josecannete commited on
Commit
0bb4513
1 Parent(s): f9bed8d

adding model distilled on sqac

Browse files
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CenIA/albert-base-spanish",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 12,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.25.1",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 31000
33
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4fa7a7b02ede1f65475e02cd558ee570d718829ddab26c9d071b2866619b522
3
+ size 44903231
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": true,
7
+ "mask_token": {
8
+ "__type": "AddedToken",
9
+ "content": "[MASK]",
10
+ "lstrip": true,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "model_max_length": 512,
16
+ "name_or_path": "CenIA/albert-base-spanish",
17
+ "pad_token": "<pad>",
18
+ "remove_space": true,
19
+ "sep_token": "[SEP]",
20
+ "special_tokens_map_file": null,
21
+ "tokenizer_class": "AlbertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
train_metrics.json ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "kd_train_loss": 1.0947023844902686,
4
+ "ce_train_loss": 1.5232174232573408,
5
+ "epoch": 1
6
+ },
7
+ {
8
+ "kd_train_loss": 0.4552461179098929,
9
+ "ce_train_loss": 0.8548114631174272,
10
+ "epoch": 2
11
+ },
12
+ {
13
+ "kd_train_loss": 0.2430057234852171,
14
+ "ce_train_loss": 0.6025659281720976,
15
+ "epoch": 3
16
+ },
17
+ {
18
+ "kd_train_loss": 0.16731666354193336,
19
+ "ce_train_loss": 0.4961035010406679,
20
+ "epoch": 4
21
+ },
22
+ {
23
+ "kd_train_loss": 0.16175333787696095,
24
+ "ce_train_loss": 0.4779861364980194,
25
+ "epoch": 5
26
+ },
27
+ {
28
+ "kd_train_loss": 0.13541899809904526,
29
+ "ce_train_loss": 0.4445823802826262,
30
+ "epoch": 6
31
+ },
32
+ {
33
+ "kd_train_loss": 0.09306218622966539,
34
+ "ce_train_loss": 0.38481191638767404,
35
+ "epoch": 7
36
+ },
37
+ {
38
+ "kd_train_loss": 0.08379529779310373,
39
+ "ce_train_loss": 0.37093287068804964,
40
+ "epoch": 8
41
+ },
42
+ {
43
+ "kd_train_loss": 0.09277742982147574,
44
+ "ce_train_loss": 0.3782876192794858,
45
+ "epoch": 9
46
+ },
47
+ {
48
+ "kd_train_loss": 0.07745065423794564,
49
+ "ce_train_loss": 0.3587679031873858,
50
+ "epoch": 10
51
+ },
52
+ {
53
+ "kd_train_loss": 0.061601993983148494,
54
+ "ce_train_loss": 0.33865281786260004,
55
+ "epoch": 11
56
+ },
57
+ {
58
+ "kd_train_loss": 0.061879280213489124,
59
+ "ce_train_loss": 0.33773889633071447,
60
+ "epoch": 12
61
+ },
62
+ {
63
+ "kd_train_loss": 0.06582513749814792,
64
+ "ce_train_loss": 0.3402851090150991,
65
+ "epoch": 13
66
+ },
67
+ {
68
+ "kd_train_loss": 0.05214671272316119,
69
+ "ce_train_loss": 0.32155171622071765,
70
+ "epoch": 14
71
+ },
72
+ {
73
+ "kd_train_loss": 0.04352427016663758,
74
+ "ce_train_loss": 0.3139693029099261,
75
+ "epoch": 15
76
+ },
77
+ {
78
+ "kd_train_loss": 0.047756410193107376,
79
+ "ce_train_loss": 0.3190439135798025,
80
+ "epoch": 16
81
+ },
82
+ {
83
+ "kd_train_loss": 0.04692017334389549,
84
+ "ce_train_loss": 0.3202453342917768,
85
+ "epoch": 17
86
+ },
87
+ {
88
+ "kd_train_loss": 0.03696878542872379,
89
+ "ce_train_loss": 0.30533730427093936,
90
+ "epoch": 18
91
+ },
92
+ {
93
+ "kd_train_loss": 0.03315722367991846,
94
+ "ce_train_loss": 0.3032886844580573,
95
+ "epoch": 19
96
+ },
97
+ {
98
+ "kd_train_loss": 0.0393941437929639,
99
+ "ce_train_loss": 0.3104962869251222,
100
+ "epoch": 20
101
+ },
102
+ {
103
+ "kd_train_loss": 0.03725780002317016,
104
+ "ce_train_loss": 0.308575238288705,
105
+ "epoch": 21
106
+ },
107
+ {
108
+ "kd_train_loss": 0.027123317775641333,
109
+ "ce_train_loss": 0.2964426196657609,
110
+ "epoch": 22
111
+ },
112
+ {
113
+ "kd_train_loss": 0.03259203548189647,
114
+ "ce_train_loss": 0.30378937095884584,
115
+ "epoch": 23
116
+ },
117
+ {
118
+ "kd_train_loss": 0.02507180978475867,
119
+ "ce_train_loss": 0.2968123564478692,
120
+ "epoch": 24
121
+ },
122
+ {
123
+ "kd_train_loss": 0.018539781134435638,
124
+ "ce_train_loss": 0.28946925138584506,
125
+ "epoch": 25
126
+ },
127
+ {
128
+ "kd_train_loss": 0.016799416104134223,
129
+ "ce_train_loss": 0.28864503644141215,
130
+ "epoch": 26
131
+ },
132
+ {
133
+ "kd_train_loss": 0.0196742503607721,
134
+ "ce_train_loss": 0.29058823078109924,
135
+ "epoch": 27
136
+ },
137
+ {
138
+ "kd_train_loss": 0.023319941088969967,
139
+ "ce_train_loss": 0.2963838350523069,
140
+ "epoch": 28
141
+ },
142
+ {
143
+ "kd_train_loss": 0.020008223797042132,
144
+ "ce_train_loss": 0.29038273367262757,
145
+ "epoch": 29
146
+ },
147
+ {
148
+ "kd_train_loss": 0.015205842962305498,
149
+ "ce_train_loss": 0.28679023208975907,
150
+ "epoch": 30
151
+ },
152
+ {
153
+ "kd_train_loss": 0.014996169876084736,
154
+ "ce_train_loss": 0.2855800387695403,
155
+ "epoch": 31
156
+ },
157
+ {
158
+ "kd_train_loss": 0.01633226615242186,
159
+ "ce_train_loss": 0.28839117782423723,
160
+ "epoch": 32
161
+ },
162
+ {
163
+ "kd_train_loss": 0.015631119930364822,
164
+ "ce_train_loss": 0.28812513951757684,
165
+ "epoch": 33
166
+ },
167
+ {
168
+ "kd_train_loss": 0.013789654335479013,
169
+ "ce_train_loss": 0.28554097334773554,
170
+ "epoch": 34
171
+ },
172
+ {
173
+ "kd_train_loss": 0.012514014381140127,
174
+ "ce_train_loss": 0.2846834968007484,
175
+ "epoch": 35
176
+ },
177
+ {
178
+ "kd_train_loss": 0.011176710358869305,
179
+ "ce_train_loss": 0.28357245165115375,
180
+ "epoch": 36
181
+ },
182
+ {
183
+ "kd_train_loss": 0.01115612870038295,
184
+ "ce_train_loss": 0.28384113424818297,
185
+ "epoch": 37
186
+ },
187
+ {
188
+ "kd_train_loss": 0.011056883265093858,
189
+ "ce_train_loss": 0.2827804360775589,
190
+ "epoch": 38
191
+ },
192
+ {
193
+ "kd_train_loss": 0.009166364349659912,
194
+ "ce_train_loss": 0.28117091059218197,
195
+ "epoch": 39
196
+ },
197
+ {
198
+ "kd_train_loss": 0.00822960990880345,
199
+ "ce_train_loss": 0.2804953294531631,
200
+ "epoch": 40
201
+ },
202
+ {
203
+ "kd_train_loss": 0.0077868187860612665,
204
+ "ce_train_loss": 0.28032581792221994,
205
+ "epoch": 41
206
+ },
207
+ {
208
+ "kd_train_loss": 0.007924732372616929,
209
+ "ce_train_loss": 0.280493331260204,
210
+ "epoch": 42
211
+ },
212
+ {
213
+ "kd_train_loss": 0.007657609827370564,
214
+ "ce_train_loss": 0.2806634148062424,
215
+ "epoch": 43
216
+ },
217
+ {
218
+ "kd_train_loss": 0.006935315165598742,
219
+ "ce_train_loss": 0.27951953480978403,
220
+ "epoch": 44
221
+ },
222
+ {
223
+ "kd_train_loss": 0.00630589836790282,
224
+ "ce_train_loss": 0.2791120875716525,
225
+ "epoch": 45
226
+ },
227
+ {
228
+ "kd_train_loss": 0.00583886285222751,
229
+ "ce_train_loss": 0.2784836254511495,
230
+ "epoch": 46
231
+ }
232
+ ]
validation_metrics.json ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "exact_match": 55.5793991416309,
4
+ "f1": 74.34233087415208,
5
+ "validation_loss": 1.087331213113916,
6
+ "epoch": 1
7
+ },
8
+ {
9
+ "exact_match": 57.40343347639485,
10
+ "f1": 75.51273922475161,
11
+ "validation_loss": 1.0228341174034672,
12
+ "epoch": 2
13
+ },
14
+ {
15
+ "exact_match": 58.95922746781116,
16
+ "f1": 76.56956220870414,
17
+ "validation_loss": 1.0212595376349587,
18
+ "epoch": 3
19
+ },
20
+ {
21
+ "exact_match": 59.066523605150216,
22
+ "f1": 76.6711666766026,
23
+ "validation_loss": 1.0487165486312096,
24
+ "epoch": 4
25
+ },
26
+ {
27
+ "exact_match": 57.34978540772532,
28
+ "f1": 75.12240297455054,
29
+ "validation_loss": 1.0870489833009152,
30
+ "epoch": 5
31
+ },
32
+ {
33
+ "exact_match": 59.44206008583691,
34
+ "f1": 76.48396562860185,
35
+ "validation_loss": 0.9862570107438182,
36
+ "epoch": 6
37
+ },
38
+ {
39
+ "exact_match": 60.08583690987125,
40
+ "f1": 76.55882323176809,
41
+ "validation_loss": 0.9469387385681385,
42
+ "epoch": 7
43
+ },
44
+ {
45
+ "exact_match": 58.31545064377682,
46
+ "f1": 75.88445620337203,
47
+ "validation_loss": 1.0177030702128664,
48
+ "epoch": 8
49
+ },
50
+ {
51
+ "exact_match": 60.46137339055794,
52
+ "f1": 77.47096613095226,
53
+ "validation_loss": 0.9537355145652786,
54
+ "epoch": 9
55
+ },
56
+ {
57
+ "exact_match": 60.407725321888414,
58
+ "f1": 77.07440337017155,
59
+ "validation_loss": 0.9325784337884597,
60
+ "epoch": 10
61
+ },
62
+ {
63
+ "exact_match": 60.30042918454936,
64
+ "f1": 77.07169436276584,
65
+ "validation_loss": 0.9370737703701922,
66
+ "epoch": 11
67
+ },
68
+ {
69
+ "exact_match": 60.51502145922747,
70
+ "f1": 77.14843052463635,
71
+ "validation_loss": 0.9131858680994456,
72
+ "epoch": 12
73
+ },
74
+ {
75
+ "exact_match": 60.30042918454936,
76
+ "f1": 77.04750041104447,
77
+ "validation_loss": 0.9172795615123428,
78
+ "epoch": 13
79
+ },
80
+ {
81
+ "exact_match": 61.10515021459227,
82
+ "f1": 77.94904083786975,
83
+ "validation_loss": 0.8892349123044778,
84
+ "epoch": 14
85
+ },
86
+ {
87
+ "exact_match": 59.817596566523605,
88
+ "f1": 76.87952799083818,
89
+ "validation_loss": 0.935095418045539,
90
+ "epoch": 15
91
+ },
92
+ {
93
+ "exact_match": 60.836909871244636,
94
+ "f1": 76.95381778420939,
95
+ "validation_loss": 0.9057896752621382,
96
+ "epoch": 16
97
+ },
98
+ {
99
+ "exact_match": 60.836909871244636,
100
+ "f1": 77.10928836980781,
101
+ "validation_loss": 0.9057705810734333,
102
+ "epoch": 17
103
+ },
104
+ {
105
+ "exact_match": 60.89055793991416,
106
+ "f1": 77.77988704830621,
107
+ "validation_loss": 0.8974879779888474,
108
+ "epoch": 18
109
+ },
110
+ {
111
+ "exact_match": 61.85622317596567,
112
+ "f1": 78.00866198183222,
113
+ "validation_loss": 0.8770076184327366,
114
+ "epoch": 19
115
+ },
116
+ {
117
+ "exact_match": 60.1931330472103,
118
+ "f1": 77.63181857679082,
119
+ "validation_loss": 0.9052854824157162,
120
+ "epoch": 20
121
+ },
122
+ {
123
+ "exact_match": 61.1587982832618,
124
+ "f1": 78.30490987484033,
125
+ "validation_loss": 0.879315102145872,
126
+ "epoch": 21
127
+ },
128
+ {
129
+ "exact_match": 61.85622317596567,
130
+ "f1": 78.611114259606,
131
+ "validation_loss": 0.8659358330582845,
132
+ "epoch": 22
133
+ },
134
+ {
135
+ "exact_match": 61.31974248927039,
136
+ "f1": 78.26926128409802,
137
+ "validation_loss": 0.8797163142047766,
138
+ "epoch": 23
139
+ },
140
+ {
141
+ "exact_match": 61.587982832618025,
142
+ "f1": 78.45246344734926,
143
+ "validation_loss": 0.8673172258693753,
144
+ "epoch": 24
145
+ },
146
+ {
147
+ "exact_match": 61.74892703862661,
148
+ "f1": 78.42813496964585,
149
+ "validation_loss": 0.8609011983143464,
150
+ "epoch": 25
151
+ },
152
+ {
153
+ "exact_match": 61.69527896995708,
154
+ "f1": 78.40131461977644,
155
+ "validation_loss": 0.8753554505928782,
156
+ "epoch": 26
157
+ },
158
+ {
159
+ "exact_match": 62.392703862660944,
160
+ "f1": 78.66953321461574,
161
+ "validation_loss": 0.8748681242684372,
162
+ "epoch": 27
163
+ },
164
+ {
165
+ "exact_match": 62.44635193133047,
166
+ "f1": 78.51469925515028,
167
+ "validation_loss": 0.8763929468984822,
168
+ "epoch": 28
169
+ },
170
+ {
171
+ "exact_match": 62.392703862660944,
172
+ "f1": 78.7724119904101,
173
+ "validation_loss": 0.8719070598141838,
174
+ "epoch": 29
175
+ },
176
+ {
177
+ "exact_match": 63.358369098712444,
178
+ "f1": 79.40679790604415,
179
+ "validation_loss": 0.8574135223872789,
180
+ "epoch": 30
181
+ },
182
+ {
183
+ "exact_match": 62.23175965665236,
184
+ "f1": 78.59155522358435,
185
+ "validation_loss": 0.8651762600163467,
186
+ "epoch": 31
187
+ },
188
+ {
189
+ "exact_match": 61.85622317596567,
190
+ "f1": 78.49857899641856,
191
+ "validation_loss": 0.867353880905923,
192
+ "epoch": 32
193
+ },
194
+ {
195
+ "exact_match": 62.71459227467811,
196
+ "f1": 78.90852567045103,
197
+ "validation_loss": 0.8571221979747292,
198
+ "epoch": 33
199
+ },
200
+ {
201
+ "exact_match": 62.07081545064378,
202
+ "f1": 78.73070977943713,
203
+ "validation_loss": 0.8601729876667489,
204
+ "epoch": 34
205
+ },
206
+ {
207
+ "exact_match": 62.71459227467811,
208
+ "f1": 78.98855624442972,
209
+ "validation_loss": 0.8550719909995567,
210
+ "epoch": 35
211
+ },
212
+ {
213
+ "exact_match": 62.92918454935622,
214
+ "f1": 79.15738074387234,
215
+ "validation_loss": 0.8596407024914982,
216
+ "epoch": 36
217
+ },
218
+ {
219
+ "exact_match": 62.71459227467811,
220
+ "f1": 78.86830316563682,
221
+ "validation_loss": 0.8629121434597569,
222
+ "epoch": 37
223
+ },
224
+ {
225
+ "exact_match": 62.607296137339056,
226
+ "f1": 79.00873408726964,
227
+ "validation_loss": 0.8626481908878297,
228
+ "epoch": 38
229
+ },
230
+ {
231
+ "exact_match": 62.392703862660944,
232
+ "f1": 78.72134194996835,
233
+ "validation_loss": 0.868450756746394,
234
+ "epoch": 39
235
+ },
236
+ {
237
+ "exact_match": 62.607296137339056,
238
+ "f1": 79.01008356557179,
239
+ "validation_loss": 0.8624537068468924,
240
+ "epoch": 40
241
+ },
242
+ {
243
+ "exact_match": 62.607296137339056,
244
+ "f1": 78.95927550801754,
245
+ "validation_loss": 0.8672337875566409,
246
+ "epoch": 41
247
+ },
248
+ {
249
+ "exact_match": 62.8755364806867,
250
+ "f1": 79.13112096393785,
251
+ "validation_loss": 0.8608657327544598,
252
+ "epoch": 42
253
+ },
254
+ {
255
+ "exact_match": 63.25107296137339,
256
+ "f1": 79.39563189203274,
257
+ "validation_loss": 0.8645321310476493,
258
+ "epoch": 43
259
+ },
260
+ {
261
+ "exact_match": 63.03648068669528,
262
+ "f1": 79.21975487809372,
263
+ "validation_loss": 0.8642783073978569,
264
+ "epoch": 44
265
+ },
266
+ {
267
+ "exact_match": 62.82188841201717,
268
+ "f1": 79.17664808202917,
269
+ "validation_loss": 0.8628851865766612,
270
+ "epoch": 45
271
+ },
272
+ {
273
+ "exact_match": 62.76824034334764,
274
+ "f1": 78.9824787443219,
275
+ "validation_loss": 0.862493644120129,
276
+ "epoch": 46
277
+ }
278
+ ]