Thalesian commited on
Commit
b211dfe
ยท
verified ยท
1 Parent(s): 0677cfb

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -13,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on the None dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 0.1708
17
 
18
  ## Model description
19
 
@@ -32,33 +33,27 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 5e-06
36
- - train_batch_size: 36
37
- - eval_batch_size: 36
38
  - seed: 42
39
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
  - lr_scheduler_type: linear
41
- - lr_scheduler_warmup_steps: 10000
42
- - num_epochs: 200
43
 
44
  ### Training results
45
 
46
  | Training Loss | Epoch | Step | Validation Loss |
47
  |:-------------:|:-----:|:-----:|:---------------:|
48
- | 0.4186 | 1.0 | 1840 | 1.7597 |
49
- | 0.0999 | 2.0 | 3680 | 0.4629 |
50
- | 0.0711 | 3.0 | 5520 | 0.2730 |
51
- | 0.059 | 4.0 | 7360 | 0.1899 |
52
- | 0.0474 | 5.0 | 9200 | 0.1696 |
53
- | 0.0469 | 6.0 | 11040 | 0.1704 |
54
- | 0.0437 | 7.0 | 12880 | 0.1680 |
55
- | 0.0429 | 8.0 | 14720 | 0.1691 |
56
- | 0.0436 | 9.0 | 16560 | 0.1708 |
57
 
58
 
59
  ### Framework versions
60
 
61
- - Transformers 4.44.0.dev0
62
  - Pytorch 2.6.0.dev20241217
63
  - Datasets 2.20.0
64
- - Tokenizers 0.19.1
 
1
  ---
2
+ library_name: transformers
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
14
 
15
  This model was trained from scratch on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.2359
18
 
19
  ## Model description
20
 
 
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
+ - learning_rate: 0.0006047816549758072
37
+ - train_batch_size: 8
38
+ - eval_batch_size: 8
39
  - seed: 42
40
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.964129172421366,0.8471340191802936) and epsilon=1.51279024695782e-08 and optimizer_args=No additional optimizer arguments
41
  - lr_scheduler_type: linear
42
+ - lr_scheduler_warmup_steps: 2593
43
+ - num_epochs: 500
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:-----:|:---------------:|
49
+ | 0.2747 | 1.0 | 6351 | 0.1538 |
50
+ | 0.2793 | 2.0 | 12702 | 0.1542 |
51
+ | 0.2894 | 3.0 | 19053 | 0.2359 |
 
 
 
 
 
 
52
 
53
 
54
  ### Framework versions
55
 
56
+ - Transformers 4.49.0
57
  - Pytorch 2.6.0.dev20241217
58
  - Datasets 2.20.0
59
+ - Tokenizers 0.21.0
added_tokens.json CHANGED
@@ -1,248 +1,247 @@
1
  {
2
- "ยฑ": 32276,
3
- "โ†’": 32184,
4
- "โŒœ": 32163,
5
- "โŒ": 32144,
6
- "โ—": 32321,
7
- "โŸฆ": 32233,
8
- "โŸง": 32295,
9
- "๐€€": 32195,
10
- "๐€": 32270,
11
- "๐€‚": 32181,
12
- "๐€ƒ": 32302,
13
- "๐€„": 32248,
14
- "๐€…": 32228,
15
- "๐€†": 32204,
16
- "๐€‡": 32310,
17
- "๐€ˆ": 32326,
18
- "๐€‰": 32281,
19
- "๐€Š": 32111,
20
- "๐€‹": 32238,
21
- "๐€": 32196,
22
- "๐€Ž": 32173,
23
- "๐€": 32199,
24
- "๐€": 32297,
25
- "๐€‘": 32191,
26
- "๐€’": 32104,
27
- "๐€“": 32234,
28
- "๐€”": 32283,
29
- "๐€•": 32132,
30
- "๐€–": 32227,
31
- "๐€—": 32215,
32
- "๐€˜": 32306,
33
- "๐€™": 32193,
34
- "๐€š": 32219,
35
- "๐€›": 32330,
36
- "๐€œ": 32300,
37
- "๐€": 32143,
38
- "๐€ž": 32293,
39
- "๐€Ÿ": 32338,
40
- "๐€ ": 32171,
41
- "๐€ก": 32246,
42
- "๐€ข": 32258,
43
- "๐€ฃ": 32182,
44
- "๐€ค": 32155,
45
- "๐€ฅ": 32209,
46
- "๐€ฆ": 32141,
47
- "๐€จ": 32315,
48
- "๐€ฉ": 32136,
49
- "๐€ช": 32139,
50
- "๐€ซ": 32266,
51
- "๐€ฌ": 32220,
52
- "๐€ญ": 32203,
53
- "๐€ฎ": 32116,
54
- "๐€ฏ": 32124,
55
- "๐€ฐ": 32296,
56
- "๐€ฑ": 32334,
57
- "๐€ฒ": 32287,
58
- "๐€ณ": 32103,
59
- "๐€ด": 32202,
60
- "๐€ต": 32229,
61
- "๐€ถ": 32113,
62
- "๐€ท": 32255,
63
- "๐€ธ": 32187,
64
- "๐€น": 32211,
65
- "๐€บ": 32249,
66
- "๐€ผ": 32254,
67
- "๐€ฝ": 32301,
68
- "๐€ฟ": 32210,
69
- "๐€": 32218,
70
- "๐": 32117,
71
- "๐‚": 32162,
72
- "๐ƒ": 32178,
73
- "๐„": 32190,
74
- "๐…": 32168,
75
- "๐†": 32134,
76
- "๐‡": 32123,
77
- "๐ˆ": 32110,
78
- "๐‰": 32277,
79
- "๐Š": 32333,
80
- "๐‹": 32235,
81
- "๐Œ": 32305,
82
- "๐": 32325,
83
- "๐": 32291,
84
- "๐‘": 32152,
85
- "๐’": 32183,
86
- "๐“": 32328,
87
- "๐”": 32274,
88
- "๐•": 32304,
89
- "๐–": 32292,
90
- "๐—": 32331,
91
- "๐˜": 32336,
92
- "๐™": 32201,
93
- "๐š": 32118,
94
- "๐›": 32282,
95
- "๐œ": 32114,
96
- "๐": 32284,
97
- "๐‚€": 32128,
98
- "๐‚": 32174,
99
- "๐‚‚": 32157,
100
- "๐‚ƒ": 32180,
101
- "๐‚„": 32129,
102
- "๐‚…": 32243,
103
- "๐‚†": 32320,
104
- "๐‚‡": 32253,
105
- "๐‚ˆ": 32261,
106
- "๐‚‰": 32247,
107
- "๐‚Š": 32311,
108
- "๐‚‹": 32151,
109
- "๐‚Œ": 32179,
110
- "๐‚": 32275,
111
- "๐‚Ž": 32316,
112
- "๐‚": 32250,
113
- "๐‚": 32186,
114
- "๐‚‘": 32327,
115
- "๐‚’": 32225,
116
- "๐‚”": 32236,
117
- "๐‚•": 32329,
118
- "๐‚–": 32341,
119
- "๐‚—": 32122,
120
- "๐‚š": 32245,
121
- "๐‚›": 32102,
122
- "๐‚œ": 32332,
123
- "๐‚": 32322,
124
- "๐‚ž": 32205,
125
- "๐‚Ÿ": 32137,
126
- "๐‚ก": 32165,
127
- "๐‚ข": 32154,
128
- "๐‚ฃ": 32299,
129
- "๐‚ฅ": 32323,
130
- "๐‚ฆ": 32192,
131
- "๐‚ง": 32140,
132
- "๐‚จ": 32164,
133
- "๐‚ฉ": 32115,
134
- "๐‚ช": 32214,
135
- "๐‚ซ": 32231,
136
- "๐‚ฌ": 32232,
137
- "๐‚ญ": 32288,
138
- "๐‚ฎ": 32101,
139
- "๐‚ฏ": 32206,
140
- "๐‚ฐ": 32264,
141
- "๐‚ฑ": 32172,
142
- "๐‚ฒ": 32212,
143
- "๐‚ณ": 32268,
144
- "๐‚ด": 32133,
145
- "๐‚ต": 32112,
146
- "๐‚ถ": 32127,
147
- "๐‚ท": 32257,
148
- "๐‚ธ": 32242,
149
- "๐‚น": 32313,
150
- "๐‚บ": 32142,
151
- "๐‚ป": 32138,
152
- "๐‚ผ": 32175,
153
- "๐‚ฝ": 32230,
154
- "๐‚พ": 32273,
155
- "๐‚ฟ": 32240,
156
- "๐ƒ€": 32109,
157
- "๐ƒ": 32344,
158
- "๐ƒ‚": 32105,
159
- "๐ƒ„": 32149,
160
- "๐ƒ†": 32216,
161
- "๐ƒ‡": 32263,
162
- "๐ƒˆ": 32312,
163
- "๐ƒ‰": 32125,
164
- "๐ƒŠ": 32221,
165
- "๐ƒŒ": 32318,
166
- "๐ƒ": 32213,
167
- "๐ƒŽ": 32239,
168
- "๐ƒ": 32335,
169
- "๐ƒ": 32290,
170
- "๐ƒ‘": 32197,
171
- "๐ƒ“": 32100,
172
- "๐ƒ”": 32161,
173
- "๐ƒ•": 32207,
174
- "๐ƒ—": 32251,
175
- "๐ƒ™": 32267,
176
- "๐ƒš": 32226,
177
- "๐ƒ›": 32272,
178
- "๐ƒœ": 32224,
179
- "๐ƒ": 32298,
180
- "๐ƒž": 32170,
181
- "๐ƒŸ": 32339,
182
- "๐ƒ ": 32166,
183
- "๐ƒก": 32107,
184
- "๐ƒข": 32271,
185
- "๐ƒฃ": 32317,
186
- "๐ƒค": 32135,
187
- "๐ƒฅ": 32309,
188
- "๐ƒฆ": 32252,
189
- "๐ƒง": 32237,
190
- "๐ƒจ": 32194,
191
- "๐ƒฉ": 32177,
192
- "๐ƒช": 32185,
193
- "๐ƒซ": 32223,
194
- "๐ƒฌ": 32269,
195
- "๐ƒญ": 32189,
196
- "๐ƒฎ": 32150,
197
- "๐ƒฏ": 32167,
198
- "๐ƒฐ": 32106,
199
- "๐ƒฑ": 32222,
200
- "๐ƒฒ": 32153,
201
- "๐ƒณ": 32121,
202
- "๐ƒด": 32208,
203
- "๐ƒต": 32345,
204
- "๐ƒถ": 32265,
205
- "๐ƒท": 32126,
206
- "๐ƒธ": 32198,
207
- "๐ƒน": 32131,
208
- "๐„ˆ": 32280,
209
- "๐„‰": 32286,
210
- "๐„Š": 32342,
211
- "๐„‹": 32285,
212
- "๐„Œ": 32217,
213
- "๐„": 32259,
214
- "๐„Ž": 32256,
215
- "๐„": 32340,
216
- "๐„": 32120,
217
- "๐„‘": 32188,
218
- "๐„’": 32260,
219
- "๐„“": 32147,
220
- "๐„”": 32119,
221
- "๐„•": 32308,
222
- "๐„–": 32160,
223
- "๐„—": 32337,
224
- "๐„˜": 32176,
225
- "๐„™": 32294,
226
- "๐„š": 32289,
227
- "๐„›": 32319,
228
- "๐„œ": 32159,
229
- "๐„": 32200,
230
- "๐„ž": 32244,
231
- "๐„Ÿ": 32156,
232
- "๐„ก": 32130,
233
- "๐„ข": 32314,
234
- "๐„ฃ": 32324,
235
- "๐„ค": 32108,
236
- "๐„ฅ": 32343,
237
- "๐„ง": 32158,
238
- "๐„ช": 32307,
239
- "๐„ซ": 32169,
240
- "๐„ท": 32145,
241
- "๐„ธ": 32262,
242
- "๐„น": 32279,
243
- "๐„บ": 32146,
244
- "๐„ผ": 32148,
245
- "๐„ฝ": 32278,
246
- "๐„พ": 32241,
247
- "๐„ฟ": 32303
248
  }
 
1
  {
2
+ "ยฑ": 32201,
3
+ "โ†’": 32203,
4
+ "โŒœ": 32302,
5
+ "โŒ": 32227,
6
+ "โ—": 32274,
7
+ "โŸฆ": 32324,
8
+ "โŸง": 32318,
9
+ "๐€€": 32167,
10
+ "๐€": 32157,
11
+ "๐€‚": 32108,
12
+ "๐€ƒ": 32325,
13
+ "๐€„": 32303,
14
+ "๐€…": 32118,
15
+ "๐€†": 32208,
16
+ "๐€‡": 32189,
17
+ "๐€ˆ": 32205,
18
+ "๐€‰": 32232,
19
+ "๐€Š": 32261,
20
+ "๐€‹": 32184,
21
+ "๐€": 32228,
22
+ "๐€Ž": 32120,
23
+ "๐€": 32143,
24
+ "๐€": 32281,
25
+ "๐€‘": 32147,
26
+ "๐€’": 32100,
27
+ "๐€“": 32136,
28
+ "๐€”": 32198,
29
+ "๐€•": 32229,
30
+ "๐€–": 32289,
31
+ "๐€—": 32191,
32
+ "๐€˜": 32110,
33
+ "๐€™": 32207,
34
+ "๐€š": 32115,
35
+ "๐€›": 32145,
36
+ "๐€œ": 32168,
37
+ "๐€": 32225,
38
+ "๐€ž": 32153,
39
+ "๐€Ÿ": 32197,
40
+ "๐€ ": 32130,
41
+ "๐€ก": 32252,
42
+ "๐€ข": 32304,
43
+ "๐€ฃ": 32174,
44
+ "๐€ค": 32306,
45
+ "๐€ฅ": 32339,
46
+ "๐€ฆ": 32260,
47
+ "๐€จ": 32309,
48
+ "๐€ฉ": 32279,
49
+ "๐€ช": 32152,
50
+ "๐€ซ": 32156,
51
+ "๐€ฌ": 32139,
52
+ "๐€ญ": 32298,
53
+ "๐€ฎ": 32213,
54
+ "๐€ฏ": 32282,
55
+ "๐€ฐ": 32200,
56
+ "๐€ฑ": 32163,
57
+ "๐€ฒ": 32230,
58
+ "๐€ณ": 32148,
59
+ "๐€ด": 32322,
60
+ "๐€ต": 32162,
61
+ "๐€ถ": 32248,
62
+ "๐€ท": 32268,
63
+ "๐€ธ": 32193,
64
+ "๐€น": 32243,
65
+ "๐€บ": 32178,
66
+ "๐€ผ": 32135,
67
+ "๐€ฝ": 32175,
68
+ "๐€ฟ": 32171,
69
+ "๐€": 32331,
70
+ "๐": 32238,
71
+ "๐‚": 32342,
72
+ "๐ƒ": 32236,
73
+ "๐„": 32276,
74
+ "๐…": 32315,
75
+ "๐†": 32314,
76
+ "๐‡": 32180,
77
+ "๐ˆ": 32181,
78
+ "๐‰": 32330,
79
+ "๐Š": 32131,
80
+ "๐‹": 32253,
81
+ "๐Œ": 32101,
82
+ "๐": 32338,
83
+ "๐": 32285,
84
+ "๐‘": 32126,
85
+ "๐’": 32169,
86
+ "๐“": 32299,
87
+ "๐”": 32291,
88
+ "๐•": 32335,
89
+ "๐–": 32182,
90
+ "๐—": 32294,
91
+ "๐˜": 32102,
92
+ "๐™": 32329,
93
+ "๐š": 32122,
94
+ "๐›": 32125,
95
+ "๐œ": 32222,
96
+ "๐": 32114,
97
+ "๐‚€": 32256,
98
+ "๐‚": 32295,
99
+ "๐‚‚": 32202,
100
+ "๐‚ƒ": 32333,
101
+ "๐‚„": 32111,
102
+ "๐‚…": 32186,
103
+ "๐‚†": 32241,
104
+ "๐‚‡": 32337,
105
+ "๐‚ˆ": 32164,
106
+ "๐‚‰": 32280,
107
+ "๐‚Š": 32328,
108
+ "๐‚‹": 32209,
109
+ "๐‚Œ": 32106,
110
+ "๐‚": 32344,
111
+ "๐‚Ž": 32211,
112
+ "๐‚": 32275,
113
+ "๐‚": 32127,
114
+ "๐‚‘": 32223,
115
+ "๐‚’": 32220,
116
+ "๐‚”": 32340,
117
+ "๐‚•": 32292,
118
+ "๐‚–": 32204,
119
+ "๐‚—": 32321,
120
+ "๐‚š": 32250,
121
+ "๐‚›": 32196,
122
+ "๐‚œ": 32194,
123
+ "๐‚": 32116,
124
+ "๐‚ž": 32244,
125
+ "๐‚Ÿ": 32283,
126
+ "๐‚ก": 32121,
127
+ "๐‚ข": 32296,
128
+ "๐‚ฃ": 32264,
129
+ "๐‚ฅ": 32226,
130
+ "๐‚ฆ": 32190,
131
+ "๐‚ง": 32124,
132
+ "๐‚จ": 32231,
133
+ "๐‚ฉ": 32219,
134
+ "๐‚ช": 32286,
135
+ "๐‚ซ": 32123,
136
+ "๐‚ฌ": 32199,
137
+ "๐‚ญ": 32251,
138
+ "๐‚ฎ": 32170,
139
+ "๐‚ฏ": 32287,
140
+ "๐‚ฐ": 32334,
141
+ "๐‚ฑ": 32265,
142
+ "๐‚ฒ": 32262,
143
+ "๐‚ณ": 32218,
144
+ "๐‚ด": 32224,
145
+ "๐‚ต": 32247,
146
+ "๐‚ถ": 32183,
147
+ "๐‚ท": 32326,
148
+ "๐‚ธ": 32266,
149
+ "๐‚น": 32137,
150
+ "๐‚บ": 32160,
151
+ "๐‚ป": 32254,
152
+ "๐‚ผ": 32177,
153
+ "๐‚ฝ": 32332,
154
+ "๐‚พ": 32341,
155
+ "๐‚ฟ": 32277,
156
+ "๐ƒ€": 32142,
157
+ "๐ƒ": 32242,
158
+ "๐ƒ‚": 32323,
159
+ "๐ƒ„": 32192,
160
+ "๐ƒ†": 32217,
161
+ "๐ƒ‡": 32316,
162
+ "๐ƒˆ": 32270,
163
+ "๐ƒ‰": 32109,
164
+ "๐ƒŠ": 32146,
165
+ "๐ƒŒ": 32119,
166
+ "๐ƒ": 32144,
167
+ "๐ƒŽ": 32255,
168
+ "๐ƒ": 32150,
169
+ "๐ƒ": 32206,
170
+ "๐ƒ‘": 32307,
171
+ "๐ƒ“": 32188,
172
+ "๐ƒ”": 32273,
173
+ "๐ƒ•": 32271,
174
+ "๐ƒ—": 32210,
175
+ "๐ƒ™": 32320,
176
+ "๐ƒš": 32133,
177
+ "๐ƒœ": 32311,
178
+ "๐ƒ": 32212,
179
+ "๐ƒž": 32305,
180
+ "๐ƒŸ": 32134,
181
+ "๐ƒ ": 32308,
182
+ "๐ƒก": 32176,
183
+ "๐ƒข": 32105,
184
+ "๐ƒฃ": 32233,
185
+ "๐ƒค": 32310,
186
+ "๐ƒฅ": 32151,
187
+ "๐ƒฆ": 32221,
188
+ "๐ƒง": 32216,
189
+ "๐ƒจ": 32132,
190
+ "๐ƒฉ": 32267,
191
+ "๐ƒช": 32104,
192
+ "๐ƒซ": 32179,
193
+ "๐ƒฌ": 32290,
194
+ "๐ƒญ": 32272,
195
+ "๐ƒฎ": 32214,
196
+ "๐ƒฏ": 32155,
197
+ "๐ƒฐ": 32138,
198
+ "๐ƒฑ": 32278,
199
+ "๐ƒฒ": 32239,
200
+ "๐ƒณ": 32235,
201
+ "๐ƒด": 32107,
202
+ "๐ƒต": 32215,
203
+ "๐ƒถ": 32185,
204
+ "๐ƒท": 32259,
205
+ "๐ƒธ": 32245,
206
+ "๐ƒน": 32161,
207
+ "๐„ˆ": 32128,
208
+ "๐„‰": 32312,
209
+ "๐„Š": 32240,
210
+ "๐„‹": 32263,
211
+ "๐„Œ": 32149,
212
+ "๐„": 32154,
213
+ "๐„Ž": 32313,
214
+ "๐„": 32288,
215
+ "๐„": 32297,
216
+ "๐„‘": 32113,
217
+ "๐„’": 32343,
218
+ "๐„“": 32301,
219
+ "๐„”": 32327,
220
+ "๐„•": 32257,
221
+ "๐„–": 32140,
222
+ "๐„—": 32195,
223
+ "๐„˜": 32293,
224
+ "๐„™": 32141,
225
+ "๐„š": 32284,
226
+ "๐„›": 32112,
227
+ "๐„œ": 32269,
228
+ "๐„": 32237,
229
+ "๐„ž": 32165,
230
+ "๐„Ÿ": 32103,
231
+ "๐„ก": 32158,
232
+ "๐„ข": 32317,
233
+ "๐„ฃ": 32258,
234
+ "๐„ค": 32249,
235
+ "๐„ฅ": 32159,
236
+ "๐„ง": 32246,
237
+ "๐„ช": 32187,
238
+ "๐„ซ": 32117,
239
+ "๐„ท": 32336,
240
+ "๐„ธ": 32129,
241
+ "๐„น": 32173,
242
+ "๐„บ": 32172,
243
+ "๐„ผ": 32166,
244
+ "๐„ฝ": 32319,
245
+ "๐„พ": 32234,
246
+ "๐„ฟ": 32300
 
247
  }
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "/Users/lee/GitHub/results/GMY-T5/train_1/checkpoint-22831",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 3072,
8
  "d_kv": 64,
9
- "d_model": 768,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
@@ -18,9 +18,9 @@
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
@@ -55,7 +55,7 @@
55
  }
56
  },
57
  "torch_dtype": "float32",
58
- "transformers_version": "4.44.0.dev0",
59
  "use_cache": true,
60
- "vocab_size": 32346
61
  }
 
1
  {
2
+ "_name_or_path": "/Users/lee/GitHub/results/GMY-T5Small/train_1/checkpoint-49380",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
  "d_kv": 64,
9
+ "d_model": 512,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
 
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
+ "num_decoder_layers": 6,
22
+ "num_heads": 8,
23
+ "num_layers": 6,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
 
55
  }
56
  },
57
  "torch_dtype": "float32",
58
+ "transformers_version": "4.49.0",
59
  "use_cache": true,
60
+ "vocab_size": 32345
61
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.44.0.dev0"
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.49.0"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:775f88e543813d588848756191faf45933a549fc9179a682de772be4a21afae7
3
- size 892314408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc49ee8d282b28a209e6c48734f46133d2664927eb6eb187d3e5bfa786550dc0
3
+ size 242486312
runs/Mar09_13-24-40_Lees-MacBook-Pro.local/events.out.tfevents.1741548280.Lees-MacBook-Pro.local ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b86133789875cbce6b9633392aba80b29fe224ac8c7d89c17a1e25c7604e343
3
+ size 6262
runs/Mar09_19-01-49_Lees-MacBook-Pro.local/events.out.tfevents.1741568509.Lees-MacBook-Pro.local ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4b44c04d7c7fae11275c41d7cf84b87965eebe191cb78c8f35478e3fb75b53
3
+ size 6262
runs/Mar09_19-02-29_Lees-MacBook-Pro.local/events.out.tfevents.1741568549.Lees-MacBook-Pro.local ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b65c708ff4b2315ce1c839aa25426e102082dc9c20cacaba1685903059d6ef
3
+ size 6263
runs/Mar09_19-03-06_Lees-MacBook-Pro.local/events.out.tfevents.1741568586.Lees-MacBook-Pro.local ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c9f061ba1d99575ee9ec9385f8fa628258a782a9752eddd1db348913ab6caf4
3
+ size 6264
runs/Mar09_19-04-19_Lees-MacBook-Pro.local/events.out.tfevents.1741568659.Lees-MacBook-Pro.local ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda064cfaf786c37abb3024df2eb8a873c506190c0a50ce369efcfdb1215a90d
3
+ size 15485
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4d3f7b0755307dea535dfe79c9df3f0810c6792c1b19e60aa79c12b0513d5e4
3
- size 2466441
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ee2673679b2f8cbac9094d2e3b35d97ea4cf42460e82e5661e18524da90979
3
+ size 2466261
tokenizer_config.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f955f64689f3aefc3b7074ccae4f46f2b231d2cc9b544465bbfaf2c376e3b689
3
- size 62605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82924e48da7a95af97c7825f1cd28e32a713f5ab8859c618b36316b16dfcc9ba
3
+ size 62465
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d8448f9ced35d49c60331bf55d861a0d6b81145453f8a78f002ad520788632a
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473dd09a7604068f6cd76496c3cdc2813a700abf7689de0884f513ff3012b31e
3
+ size 5560