Thalesian commited on
Commit
a8202c9
ยท
verified ยท
1 Parent(s): cc6785a

End of training

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.1577
18
 
19
  ## Model description
20
 
@@ -33,30 +33,27 @@ More information needed
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
- - learning_rate: 0.0006047816549758072
37
- - train_batch_size: 8
38
- - eval_batch_size: 8
39
  - seed: 42
40
- - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.964129172421366,0.8471340191802936) and epsilon=1.51279024695782e-08 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
- - lr_scheduler_warmup_steps: 2593
43
- - num_epochs: 500
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:-----:|:---------------:|
49
- | 0.2814 | 1.0 | 6351 | 0.1742 |
50
- | 0.2857 | 2.0 | 12702 | 0.1582 |
51
- | 0.2872 | 3.0 | 19053 | 0.1529 |
52
- | 0.2733 | 4.0 | 25404 | 0.1528 |
53
- | 0.3071 | 5.0 | 31755 | 0.1805 |
54
- | 0.297 | 6.0 | 38106 | 0.1577 |
55
 
56
 
57
  ### Framework versions
58
 
59
- - Transformers 4.49.0
60
- - Pytorch 2.6.0.dev20241217
61
- - Datasets 2.20.0
62
- - Tokenizers 0.21.0
 
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.1153
18
 
19
  ## Model description
20
 
 
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
+ - learning_rate: 1e-05
37
+ - train_batch_size: 128
38
+ - eval_batch_size: 128
39
  - seed: 42
40
+ - optimizer: Use OptimizerNames.ADAMW_APEX_FUSED with betas=(0.826646043090655,0.991636944120939) and epsilon=3.4341677539323e-07 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
+ - lr_scheduler_warmup_steps: 5000
43
+ - num_epochs: 200
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:-----:|:---------------:|
49
+ | 0.0237 | 1.0 | 18731 | 0.1124 |
50
+ | 0.0216 | 2.0 | 37462 | 0.1128 |
51
+ | 0.0201 | 3.0 | 56193 | 0.1153 |
 
 
 
52
 
53
 
54
  ### Framework versions
55
 
56
+ - Transformers 4.50.3
57
+ - Pytorch 2.6.0+cu126
58
+ - Datasets 3.3.0
59
+ - Tokenizers 0.21.1
added_tokens.json CHANGED
@@ -1,247 +1,415 @@
1
  {
2
- "ยฑ": 32314,
3
- "โ†’": 32189,
4
- "โŒœ": 32266,
5
- "โŒ": 32231,
6
- "โ—": 32179,
7
- "โŸฆ": 32218,
8
- "โŸง": 32200,
9
- "๐€€": 32299,
10
- "๐€": 32169,
11
- "๐€‚": 32110,
12
- "๐€ƒ": 32301,
13
- "๐€„": 32261,
14
- "๐€…": 32257,
15
- "๐€†": 32102,
16
- "๐€‡": 32341,
17
- "๐€ˆ": 32136,
18
- "๐€‰": 32187,
19
- "๐€Š": 32286,
20
- "๐€‹": 32127,
21
- "๐€": 32108,
22
- "๐€Ž": 32315,
23
- "๐€": 32162,
24
- "๐€": 32154,
25
- "๐€‘": 32306,
26
- "๐€’": 32152,
27
- "๐€“": 32156,
28
- "๐€”": 32157,
29
- "๐€•": 32239,
30
- "๐€–": 32295,
31
- "๐€—": 32210,
32
- "๐€˜": 32226,
33
- "๐€™": 32340,
34
- "๐€š": 32288,
35
- "๐€›": 32304,
36
- "๐€œ": 32188,
37
- "๐€": 32272,
38
- "๐€ž": 32171,
39
- "๐€Ÿ": 32268,
40
- "๐€ ": 32124,
41
- "๐€ก": 32175,
42
- "๐€ข": 32219,
43
- "๐€ฃ": 32293,
44
- "๐€ค": 32300,
45
- "๐€ฅ": 32287,
46
- "๐€ฆ": 32172,
47
- "๐€จ": 32106,
48
- "๐€ฉ": 32214,
49
- "๐€ช": 32115,
50
- "๐€ซ": 32225,
51
- "๐€ฌ": 32309,
52
- "๐€ญ": 32159,
53
- "๐€ฎ": 32290,
54
- "๐€ฏ": 32237,
55
- "๐€ฐ": 32313,
56
- "๐€ฑ": 32194,
57
- "๐€ฒ": 32281,
58
- "๐€ณ": 32107,
59
- "๐€ด": 32265,
60
- "๐€ต": 32325,
61
- "๐€ถ": 32126,
62
- "๐€ท": 32164,
63
- "๐€ธ": 32130,
64
- "๐€น": 32198,
65
- "๐€บ": 32318,
66
- "๐€ผ": 32263,
67
- "๐€ฝ": 32321,
68
- "๐€ฟ": 32271,
69
- "๐€": 32216,
70
- "๐": 32160,
71
- "๐‚": 32144,
72
- "๐ƒ": 32135,
73
- "๐„": 32331,
74
- "๐…": 32167,
75
- "๐†": 32208,
76
- "๐‡": 32204,
77
- "๐ˆ": 32282,
78
- "๐‰": 32118,
79
- "๐Š": 32213,
80
- "๐‹": 32183,
81
- "๐Œ": 32311,
82
- "๐": 32227,
83
- "๐": 32298,
84
- "๐‘": 32344,
85
- "๐’": 32134,
86
- "๐“": 32149,
87
- "๐”": 32228,
88
- "๐•": 32264,
89
- "๐–": 32180,
90
- "๐—": 32165,
91
- "๐˜": 32101,
92
- "๐™": 32147,
93
- "๐š": 32173,
94
- "๐›": 32117,
95
- "๐œ": 32100,
96
- "๐": 32270,
97
- "๐‚€": 32267,
98
- "๐‚": 32201,
99
- "๐‚‚": 32291,
100
- "๐‚ƒ": 32128,
101
- "๐‚„": 32285,
102
- "๐‚…": 32137,
103
- "๐‚†": 32235,
104
- "๐‚‡": 32255,
105
- "๐‚ˆ": 32199,
106
- "๐‚‰": 32195,
107
- "๐‚Š": 32191,
108
- "๐‚‹": 32249,
109
- "๐‚Œ": 32177,
110
- "๐‚": 32123,
111
- "๐‚Ž": 32302,
112
- "๐‚": 32248,
113
- "๐‚": 32192,
114
- "๐‚‘": 32310,
115
- "๐‚’": 32109,
116
- "๐‚”": 32337,
117
- "๐‚•": 32276,
118
- "๐‚–": 32143,
119
- "๐‚—": 32166,
120
- "๐‚š": 32184,
121
- "๐‚›": 32338,
122
- "๐‚œ": 32292,
123
- "๐‚": 32215,
124
- "๐‚ž": 32247,
125
- "๐‚Ÿ": 32240,
126
- "๐‚ก": 32111,
127
- "๐‚ข": 32168,
128
- "๐‚ฃ": 32233,
129
- "๐‚ฅ": 32125,
130
- "๐‚ฆ": 32206,
131
- "๐‚ง": 32141,
132
- "๐‚จ": 32297,
133
- "๐‚ฉ": 32241,
134
- "๐‚ช": 32185,
135
- "๐‚ซ": 32343,
136
- "๐‚ฌ": 32251,
137
- "๐‚ญ": 32284,
138
- "๐‚ฎ": 32202,
139
- "๐‚ฏ": 32140,
140
- "๐‚ฐ": 32246,
141
- "๐‚ฑ": 32336,
142
- "๐‚ฒ": 32148,
143
- "๐‚ณ": 32256,
144
- "๐‚ด": 32317,
145
- "๐‚ต": 32104,
146
- "๐‚ถ": 32190,
147
- "๐‚ท": 32329,
148
- "๐‚ธ": 32289,
149
- "๐‚น": 32323,
150
- "๐‚บ": 32320,
151
- "๐‚ป": 32280,
152
- "๐‚ผ": 32324,
153
- "๐‚ฝ": 32150,
154
- "๐‚พ": 32230,
155
- "๐‚ฟ": 32296,
156
- "๐ƒ€": 32232,
157
- "๐ƒ": 32129,
158
- "๐ƒ‚": 32155,
159
- "๐ƒ„": 32163,
160
- "๐ƒ†": 32220,
161
- "๐ƒ‡": 32133,
162
- "๐ƒˆ": 32333,
163
- "๐ƒ‰": 32203,
164
- "๐ƒŠ": 32121,
165
- "๐ƒŒ": 32120,
166
- "๐ƒ": 32131,
167
- "๐ƒŽ": 32196,
168
- "๐ƒ": 32158,
169
- "๐ƒ": 32113,
170
- "๐ƒ‘": 32316,
171
- "๐ƒ“": 32258,
172
- "๐ƒ”": 32274,
173
- "๐ƒ•": 32217,
174
- "๐ƒ—": 32322,
175
- "๐ƒ™": 32223,
176
- "๐ƒš": 32260,
177
- "๐ƒœ": 32342,
178
- "๐ƒ": 32283,
179
- "๐ƒž": 32305,
180
- "๐ƒŸ": 32119,
181
- "๐ƒ ": 32193,
182
- "๐ƒก": 32103,
183
- "๐ƒข": 32253,
184
- "๐ƒฃ": 32234,
185
- "๐ƒค": 32312,
186
- "๐ƒฅ": 32153,
187
- "๐ƒฆ": 32146,
188
- "๐ƒง": 32114,
189
- "๐ƒจ": 32209,
190
- "๐ƒฉ": 32139,
191
- "๐ƒช": 32335,
192
- "๐ƒซ": 32151,
193
- "๐ƒฌ": 32332,
194
- "๐ƒญ": 32244,
195
- "๐ƒฎ": 32279,
196
- "๐ƒฏ": 32116,
197
- "๐ƒฐ": 32197,
198
- "๐ƒฑ": 32205,
199
- "๐ƒฒ": 32207,
200
- "๐ƒณ": 32330,
201
- "๐ƒด": 32181,
202
- "๐ƒต": 32211,
203
- "๐ƒถ": 32236,
204
- "๐ƒท": 32254,
205
- "๐ƒธ": 32308,
206
- "๐ƒน": 32339,
207
- "๐„ˆ": 32334,
208
- "๐„‰": 32328,
209
- "๐„Š": 32262,
210
- "๐„‹": 32138,
211
- "๐„Œ": 32178,
212
- "๐„": 32319,
213
- "๐„Ž": 32269,
214
- "๐„": 32122,
215
- "๐„": 32170,
216
- "๐„‘": 32221,
217
- "๐„’": 32275,
218
- "๐„“": 32245,
219
- "๐„”": 32174,
220
- "๐„•": 32224,
221
- "๐„–": 32182,
222
- "๐„—": 32142,
223
- "๐„˜": 32250,
224
- "๐„™": 32212,
225
- "๐„š": 32294,
226
- "๐„›": 32277,
227
- "๐„œ": 32242,
228
- "๐„": 32186,
229
- "๐„ž": 32132,
230
- "๐„Ÿ": 32105,
231
- "๐„ก": 32112,
232
- "๐„ข": 32243,
233
- "๐„ฃ": 32273,
234
- "๐„ค": 32238,
235
- "๐„ฅ": 32145,
236
- "๐„ง": 32259,
237
- "๐„ช": 32327,
238
- "๐„ซ": 32222,
239
- "๐„ท": 32229,
240
- "๐„ธ": 32326,
241
- "๐„น": 32176,
242
- "๐„บ": 32303,
243
- "๐„ผ": 32307,
244
- "๐„ฝ": 32161,
245
- "๐„พ": 32252,
246
- "๐„ฟ": 32278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  }
 
1
  {
2
+ "ร—": 32424,
3
+ "ล‹": 32303,
4
+ "สพ": 32444,
5
+ "โŒˆ": 32402,
6
+ "๐’€€": 32121,
7
+ "๐’€": 32445,
8
+ "๐’€‰": 32105,
9
+ "๐’€Š": 32131,
10
+ "๐’€": 32147,
11
+ "๐’€•": 32415,
12
+ "๐’€–": 32219,
13
+ "๐’€˜": 32479,
14
+ "๐’€š": 32156,
15
+ "๐’€œ": 32183,
16
+ "๐’€": 32244,
17
+ "๐’€ž": 32176,
18
+ "๐’€ ": 32369,
19
+ "๐’€ฉ": 32323,
20
+ "๐’€ช": 32364,
21
+ "๐’€ซ": 32228,
22
+ "๐’€ฌ": 32207,
23
+ "๐’€ญ": 32405,
24
+ "๐’€ฎ": 32242,
25
+ "๐’€ฏ": 32123,
26
+ "๐’€ฒ": 32391,
27
+ "๐’€ณ": 32221,
28
+ "๐’€ด": 32142,
29
+ "๐’€ธ": 32220,
30
+ "๐’€น": 32448,
31
+ "๐’€พ": 32417,
32
+ "๐’€ฟ": 32465,
33
+ "๐’€": 32175,
34
+ "๐’": 32299,
35
+ "๐’ƒ": 32494,
36
+ "๐’„": 32265,
37
+ "๐’†": 32483,
38
+ "๐’‡": 32421,
39
+ "๐’ˆ": 32288,
40
+ "๐’‰": 32174,
41
+ "๐’": 32387,
42
+ "๐’‘": 32294,
43
+ "๐’“": 32326,
44
+ "๐’”": 32342,
45
+ "๐’•": 32172,
46
+ "๐’–": 32161,
47
+ "๐’œ": 32447,
48
+ "๐’ฆ": 32497,
49
+ "๐’ฎ": 32141,
50
+ "๐’ฏ": 32206,
51
+ "๐’ฐ": 32453,
52
+ "๐’ฒ": 32130,
53
+ "๐’ณ": 32280,
54
+ "๐’ด": 32311,
55
+ "๐’ต": 32367,
56
+ "๐’ถ": 32251,
57
+ "๐’ท": 32232,
58
+ "๐’น": 32135,
59
+ "๐’บ": 32297,
60
+ "๐’ป": 32502,
61
+ "๐’ผ": 32396,
62
+ "๐’ฝ": 32499,
63
+ "๐’พ": 32340,
64
+ "๐’‚€": 32488,
65
+ "๐’‚": 32198,
66
+ "๐’‚‚": 32143,
67
+ "๐’‚ƒ": 32506,
68
+ "๐’‚„": 32270,
69
+ "๐’‚…": 32108,
70
+ "๐’‚†": 32487,
71
+ "๐’‚‡": 32249,
72
+ "๐’‚ˆ": 32234,
73
+ "๐’‚‰": 32259,
74
+ "๐’‚Š": 32363,
75
+ "๐’‚": 32245,
76
+ "๐’‚”": 32171,
77
+ "๐’‚•": 32380,
78
+ "๐’‚–": 32209,
79
+ "๐’‚—": 32160,
80
+ "๐’‚™": 32144,
81
+ "๐’‚ž": 32456,
82
+ "๐’‚Ÿ": 32413,
83
+ "๐’‚ ": 32378,
84
+ "๐’‚ก": 32401,
85
+ "๐’‚ค": 32468,
86
+ "๐’‚ฆ": 32301,
87
+ "๐’‚ฌ": 32508,
88
+ "๐’‚ฎ": 32443,
89
+ "๐’‚ต": 32252,
90
+ "๐’‚ท": 32269,
91
+ "๐’‚ผ": 32185,
92
+ "๐’ƒŒ": 32336,
93
+ "๐’ƒž": 32429,
94
+ "๐’ƒก": 32510,
95
+ "๐’ƒข": 32461,
96
+ "๐’ƒฃ": 32275,
97
+ "๐’ƒฎ": 32129,
98
+ "๐’ƒฐ": 32110,
99
+ "๐’ƒฒ": 32103,
100
+ "๐’ƒด": 32477,
101
+ "๐’ƒต": 32267,
102
+ "๐’ƒถ": 32390,
103
+ "๐’ƒท": 32100,
104
+ "๐’ƒธ": 32455,
105
+ "๐’ƒป": 32273,
106
+ "๐’ƒผ": 32375,
107
+ "๐’ƒฝ": 32307,
108
+ "๐’ƒพ": 32261,
109
+ "๐’„€": 32149,
110
+ "๐’„ƒ": 32305,
111
+ "๐’„„": 32481,
112
+ "๐’„†": 32482,
113
+ "๐’„‡": 32309,
114
+ "๐’„ˆ": 32404,
115
+ "๐’„‰": 32476,
116
+ "๐’„Š": 32127,
117
+ "๐’„‹": 32423,
118
+ "๐’„‘": 32139,
119
+ "๐’„’": 32411,
120
+ "๐’„–": 32324,
121
+ "๐’„—": 32117,
122
+ "๐’„˜": 32268,
123
+ "๐’„™": 32434,
124
+ "๐’„›": 32485,
125
+ "๐’„": 32509,
126
+ "๐’„ž": 32210,
127
+ "๐’„ ": 32355,
128
+ "๐’„ข": 32193,
129
+ "๐’„ฃ": 32203,
130
+ "๐’„ค": 32406,
131
+ "๐’„ฅ": 32113,
132
+ "๐’„ฆ": 32432,
133
+ "๐’„ง": 32189,
134
+ "๐’„จ": 32165,
135
+ "๐’„ฉ": 32202,
136
+ "๐’„ซ": 32358,
137
+ "๐’„ฌ": 32140,
138
+ "๐’„ญ": 32238,
139
+ "๐’„ฏ": 32361,
140
+ "๐’„ฐ": 32241,
141
+ "๐’„ฒ": 32503,
142
+ "๐’„ด": 32354,
143
+ "๐’„ต": 32322,
144
+ "๐’„ท": 32166,
145
+ "๐’„ธ": 32505,
146
+ "๐’„ฝ": 32383,
147
+ "๐’„พ": 32151,
148
+ "๐’„ฟ": 32295,
149
+ "๐’…€": 32395,
150
+ "๐’…": 32418,
151
+ "๐’…‚": 32351,
152
+ "๐’……": 32180,
153
+ "๐’…†": 32337,
154
+ "๐’…‡": 32376,
155
+ "๐’…ˆ": 32359,
156
+ "๐’…Š": 32460,
157
+ "๐’…‹": 32145,
158
+ "๐’…": 32212,
159
+ "๐’…Ž": 32385,
160
+ "๐’…“": 32449,
161
+ "๐’…”": 32237,
162
+ "๐’…•": 32195,
163
+ "๐’…–": 32122,
164
+ "๐’…—": 32155,
165
+ "๐’…˜": 32360,
166
+ "๐’…œ": 32240,
167
+ "๐’…ข": 32439,
168
+ "๐’…ค": 32412,
169
+ "๐’…ฅ": 32200,
170
+ "๐’…ฎ": 32484,
171
+ "๐’…ด": 32253,
172
+ "๐’…ธ": 32222,
173
+ "๐’…ป": 32116,
174
+ "๐’…พ": 32462,
175
+ "๐’†ƒ": 32315,
176
+ "๐’†": 32325,
177
+ "๐’†": 32178,
178
+ "๐’†": 32469,
179
+ "๐’†‘": 32126,
180
+ "๐’†’": 32134,
181
+ "๐’†“": 32491,
182
+ "๐’†•": 32137,
183
+ "๐’†—": 32343,
184
+ "๐’†˜": 32492,
185
+ "๐’†š": 32422,
186
+ "๐’†›": 32472,
187
+ "๐’†œ": 32302,
188
+ "๐’†Ÿ": 32328,
189
+ "๐’† ": 32398,
190
+ "๐’†ข": 32128,
191
+ "๐’†ค": 32196,
192
+ "๐’†ฅ": 32262,
193
+ "๐’†ฆ": 32266,
194
+ "๐’†ง": 32235,
195
+ "๐’†ช": 32146,
196
+ "๐’†ฌ": 32416,
197
+ "๐’†ญ": 32349,
198
+ "๐’†ฏ": 32489,
199
+ "๐’†ฐ": 32285,
200
+ "๐’†ฒ": 32283,
201
+ "๐’†ณ": 32257,
202
+ "๐’†ต": 32426,
203
+ "๐’†ท": 32194,
204
+ "๐’†ธ": 32353,
205
+ "๐’†น": 32304,
206
+ "๐’‡€": 32495,
207
+ "๐’‡…": 32112,
208
+ "๐’‡†": 32478,
209
+ "๐’‡‡": 32260,
210
+ "๐’‡‰": 32496,
211
+ "๐’‡’": 32441,
212
+ "๐’‡ก": 32236,
213
+ "๐’‡ฅ": 32230,
214
+ "๐’‡ง": 32392,
215
+ "๐’‡ฌ": 32500,
216
+ "๐’‡ญ": 32450,
217
+ "๐’‡ฏ": 32227,
218
+ "๐’‡ฒ": 32403,
219
+ "๐’‡ณ": 32427,
220
+ "๐’‡ด": 32470,
221
+ "๐’‡ต": 32473,
222
+ "๐’‡ท": 32345,
223
+ "๐’‡ธ": 32119,
224
+ "๐’‡น": 32410,
225
+ "๐’‡บ": 32339,
226
+ "๐’‡ป": 32314,
227
+ "๐’‡ผ": 32504,
228
+ "๐’‡ฝ": 32306,
229
+ "๐’‡ฟ": 32329,
230
+ "๐’ˆ•": 32184,
231
+ "๐’ˆ–": 32438,
232
+ "๐’ˆ—": 32258,
233
+ "๐’ˆ›": 32286,
234
+ "๐’ˆœ": 32247,
235
+ "๐’ˆ": 32352,
236
+ "๐’ˆ ": 32321,
237
+ "๐’ˆข": 32454,
238
+ "๐’ˆฃ": 32163,
239
+ "๐’ˆค": 32168,
240
+ "๐’ˆฅ": 32313,
241
+ "๐’ˆฆ": 32188,
242
+ "๐’ˆง": 32138,
243
+ "๐’ˆจ": 32366,
244
+ "๐’ˆฉ": 32409,
245
+ "๐’ˆช": 32223,
246
+ "๐’ˆซ": 32271,
247
+ "๐’ˆฌ": 32208,
248
+ "๐’ˆญ": 32279,
249
+ "๐’ˆฎ": 32397,
250
+ "๐’ˆฏ": 32152,
251
+ "๐’ˆฒ": 32381,
252
+ "๐’ˆน": 32437,
253
+ "๐’ˆป": 32486,
254
+ "๐’ˆฝ": 32312,
255
+ "๐’ˆพ": 32310,
256
+ "๐’ˆฟ": 32463,
257
+ "๐’‰€": 32498,
258
+ "๐’‰„": 32319,
259
+ "๐’‰…": 32278,
260
+ "๐’‰†": 32224,
261
+ "๐’‰‡": 32132,
262
+ "๐’‰ˆ": 32111,
263
+ "๐’‰‹": 32106,
264
+ "๐’‰Œ": 32169,
265
+ "๐’‰Ž": 32250,
266
+ "๐’‰": 32162,
267
+ "๐’‰": 32436,
268
+ "๐’‰‘": 32428,
269
+ "๐’‰’": 32474,
270
+ "๐’‰“": 32451,
271
+ "๐’‰˜": 32107,
272
+ "๐’‰š": 32333,
273
+ "๐’‰ ": 32320,
274
+ "๐’‰ก": 32290,
275
+ "๐’‰ข": 32493,
276
+ "๐’‰ฃ": 32199,
277
+ "๐’‰ช": 32159,
278
+ "๐’‰ญ": 32243,
279
+ "๐’‰บ": 32308,
280
+ "๐’‰ป": 32211,
281
+ "๐’‰ผ": 32512,
282
+ "๐’‰ฝ": 32102,
283
+ "๐’‰พ": 32226,
284
+ "๐’‰ฟ": 32186,
285
+ "๐’ŠŠ": 32104,
286
+ "๐’ŠŒ": 32407,
287
+ "๐’Š": 32300,
288
+ "๐’Š": 32281,
289
+ "๐’А": 32382,
290
+ "๐’Š‘": 32292,
291
+ "๐’Š’": 32341,
292
+ "๐’Š“": 32158,
293
+ "๐’Š•": 32217,
294
+ "๐’Šš": 32459,
295
+ "๐’Šจ": 32471,
296
+ "๐’Šฉ": 32214,
297
+ "๐’Šฌ": 32389,
298
+ "๐’Šญ": 32182,
299
+ "๐’Šฎ": 32239,
300
+ "๐’Šฏ": 32467,
301
+ "๐’Šท": 32457,
302
+ "๐’Šน": 32204,
303
+ "๐’Šบ": 32191,
304
+ "๐’Šป": 32347,
305
+ "๐’Šฟ": 32350,
306
+ "๐’‹€": 32231,
307
+ "๐’‹": 32187,
308
+ "๐’‹ƒ": 32330,
309
+ "๐’‹†": 32296,
310
+ "๐’‹‡": 32480,
311
+ "๐’‹‹": 32501,
312
+ "๐’‹“": 32490,
313
+ "๐’‹—": 32408,
314
+ "๐’‹™": 32216,
315
+ "๐’‹š": 32357,
316
+ "๐’‹›": 32372,
317
+ "๐’‹œ": 32373,
318
+ "๐’‹": 32346,
319
+ "๐’‹ž": 32327,
320
+ "๐’‹ ": 32274,
321
+ "๐’‹ก": 32173,
322
+ "๐’‹ข": 32136,
323
+ "๐’‹ค": 32414,
324
+ "๐’‹ฅ": 32289,
325
+ "๐’‹ฆ": 32379,
326
+ "๐’‹ง": 32170,
327
+ "๐’‹ฉ": 32374,
328
+ "๐’‹ซ": 32368,
329
+ "๐’‹ฌ": 32133,
330
+ "๐’‹ญ": 32293,
331
+ "๐’‹ฐ": 32388,
332
+ "๐’‹ณ": 32338,
333
+ "๐’‹บ": 32466,
334
+ "๐’‹ป": 32356,
335
+ "๐’‹ผ": 32335,
336
+ "๐’‹ฝ": 32440,
337
+ "๐’‹พ": 32154,
338
+ "๐’Œ€": 32344,
339
+ "๐’Œ": 32287,
340
+ "๐’Œ…": 32316,
341
+ "๐’Œ†": 32365,
342
+ "๐’Œ‡": 32101,
343
+ "๐’Œˆ": 32318,
344
+ "๐’Œ‰": 32190,
345
+ "๐’Œ‹": 32177,
346
+ "๐’ŒŒ": 32157,
347
+ "๐’Œ": 32150,
348
+ "๐’Œ‘": 32120,
349
+ "๐’Œ’": 32377,
350
+ "๐’Œ“": 32400,
351
+ "๐’Œ”": 32148,
352
+ "๐’Œ—": 32317,
353
+ "๐’Œœ": 32218,
354
+ "๐’Œ": 32277,
355
+ "๐’Œข": 32256,
356
+ "๐’Œฃ": 32371,
357
+ "๐’Œค": 32464,
358
+ "๐’Œฆ": 32197,
359
+ "๐’Œจ": 32386,
360
+ "๐’Œซ": 32233,
361
+ "๐’Œด": 32446,
362
+ "๐’Œต": 32124,
363
+ "๐’Œถ": 32442,
364
+ "๐’Œท": 32164,
365
+ "๐’Œบ": 32167,
366
+ "๐’€": 32425,
367
+ "๐’‚": 32435,
368
+ "๐’‡": 32284,
369
+ "๐’": 32264,
370
+ "๐’Ž": 32431,
371
+ "๐’": 32109,
372
+ "๐’‘": 32334,
373
+ "๐’’": 32118,
374
+ "๐’—": 32125,
375
+ "๐’š": 32430,
376
+ "๐’œ": 32254,
377
+ "๐’": 32229,
378
+ "๐’ž": 32507,
379
+ "๐’ ": 32205,
380
+ "๐’ข": 32179,
381
+ "๐’ฃ": 32394,
382
+ "๐’ค": 32458,
383
+ "๐’ฅ": 32255,
384
+ "๐’ฆ": 32114,
385
+ "๐’จ": 32213,
386
+ "๐’ฉ": 32475,
387
+ "๐’ช": 32201,
388
+ "๐’ฌ": 32511,
389
+ "๐’ฎ": 32291,
390
+ "๐’ˆ": 32298,
391
+ "๐’‰": 32192,
392
+ "๐’Š": 32419,
393
+ "๐’‹": 32181,
394
+ "๐’Œ": 32115,
395
+ "๐’": 32282,
396
+ "๐’": 32393,
397
+ "๐’": 32246,
398
+ "๐’•": 32348,
399
+ "๐’–": 32248,
400
+ "๐’—": 32215,
401
+ "๐’š": 32433,
402
+ "๐’ž": 32384,
403
+ "๐’ป": 32331,
404
+ "๐’‘†": 32332,
405
+ "๐’‘": 32420,
406
+ "๐’‘": 32276,
407
+ "๐’‘‘": 32370,
408
+ "๐’‘’": 32263,
409
+ "๐’‘”": 32153,
410
+ "๐’‘–": 32225,
411
+ "๐’‘š": 32362,
412
+ "๐’‘›": 32272,
413
+ "๐’‘ฑ": 32452,
414
+ "๐’‘ณ": 32399
415
  }
config.json CHANGED
@@ -1,12 +1,11 @@
1
  {
2
- "_name_or_path": "/Users/lee/GitHub/results/GMY-T5Small/train_1/checkpoint-65840",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
  "d_kv": 64,
9
- "d_model": 512,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
@@ -18,9 +17,9 @@
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
- "num_decoder_layers": 6,
22
- "num_heads": 8,
23
- "num_layers": 6,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
@@ -55,7 +54,7 @@
55
  }
56
  },
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.49.0",
59
  "use_cache": true,
60
- "vocab_size": 32345
61
  }
 
1
  {
 
2
  "architectures": [
3
  "T5ForConditionalGeneration"
4
  ],
5
  "classifier_dropout": 0.0,
6
+ "d_ff": 3072,
7
  "d_kv": 64,
8
+ "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "relu",
11
  "dropout_rate": 0.1,
 
17
  "layer_norm_epsilon": 1e-06,
18
  "model_type": "t5",
19
  "n_positions": 512,
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
23
  "output_past": true,
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
 
54
  }
55
  },
56
  "torch_dtype": "float32",
57
+ "transformers_version": "4.50.3",
58
  "use_cache": true,
59
+ "vocab_size": 32513
60
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.49.0"
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.50.3"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7f5acc0e0a8661f5cfc17f326e922b7a39a32e92f43973c0b13a9bb42bed48
3
- size 242486312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b355c8952aa6b646e2ceaedda7950541d09bfbea54f24486af95e5935373c5a
3
+ size 892827432
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4645dfb7555ec9f79235ff2195ef1775751eab22cd17671731d376a2edca89ab
3
- size 2466261
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1a24a97d5195455dd1fbf567d54e18c575a09f3b208ca957c2d319909b36be
3
+ size 2496502
tokenizer_config.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:226471339474e6ac717acdd053bf20542f82bc2c6871f4cdc128bed5acd7516c
3
- size 62465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3757494a511ac5c6706bf61d0b94c86e25b0c88c477932122634597858deb533
3
+ size 91055
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:332ee1e3d81fcfdb71d17c2ea4ccecf168f3c2a602aad1e2850c9c02f1268160
3
- size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9643441c6cbb1024a83de6334eb0c291be2906f1780a037691fe0da36f334e10
3
+ size 5496