josecannete commited on
Commit
7f64672
1 Parent(s): 29594d2

adding model distilled on mlqa

Browse files
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "josecannete/albert-base-spanish-4",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 4,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.25.1",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 31000
33
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dbe5b68e2c45dc205b56ca4e70c99d04bd318b5b787c4f3171e156c080b8051
3
+ size 44903231
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": true,
7
+ "mask_token": {
8
+ "__type": "AddedToken",
9
+ "content": "[MASK]",
10
+ "lstrip": true,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "model_max_length": 512,
16
+ "name_or_path": "josecannete/albert-base-spanish-4",
17
+ "pad_token": "<pad>",
18
+ "remove_space": true,
19
+ "sep_token": "[SEP]",
20
+ "special_tokens_map_file": null,
21
+ "tokenizer_class": "AlbertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
train_metrics.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "kd_train_loss": 0.9267110703099266,
4
+ "ce_train_loss": 2.2553009721142003,
5
+ "epoch": 1
6
+ },
7
+ {
8
+ "kd_train_loss": 0.437324966422325,
9
+ "ce_train_loss": 1.75392818311556,
10
+ "epoch": 2
11
+ },
12
+ {
13
+ "kd_train_loss": 0.25564840695271246,
14
+ "ce_train_loss": 1.549681672491046,
15
+ "epoch": 3
16
+ },
17
+ {
18
+ "kd_train_loss": 0.19114146709720883,
19
+ "ce_train_loss": 1.467941019872809,
20
+ "epoch": 4
21
+ },
22
+ {
23
+ "kd_train_loss": 0.16233224118606057,
24
+ "ce_train_loss": 1.43373512829177,
25
+ "epoch": 5
26
+ },
27
+ {
28
+ "kd_train_loss": 0.1377103858840066,
29
+ "ce_train_loss": 1.4005313754917956,
30
+ "epoch": 6
31
+ },
32
+ {
33
+ "kd_train_loss": 0.12142761922582973,
34
+ "ce_train_loss": 1.3796487985090269,
35
+ "epoch": 7
36
+ },
37
+ {
38
+ "kd_train_loss": 0.10911479878325715,
39
+ "ce_train_loss": 1.364345971253916,
40
+ "epoch": 8
41
+ },
42
+ {
43
+ "kd_train_loss": 0.10162057526938077,
44
+ "ce_train_loss": 1.35470530912222,
45
+ "epoch": 9
46
+ },
47
+ {
48
+ "kd_train_loss": 0.09173406713692267,
49
+ "ce_train_loss": 1.34008420633322,
50
+ "epoch": 10
51
+ },
52
+ {
53
+ "kd_train_loss": 0.08481045791462632,
54
+ "ce_train_loss": 1.331363442719029,
55
+ "epoch": 11
56
+ },
57
+ {
58
+ "kd_train_loss": 0.07994053070887122,
59
+ "ce_train_loss": 1.3250762583195745,
60
+ "epoch": 12
61
+ },
62
+ {
63
+ "kd_train_loss": 0.07311130602374406,
64
+ "ce_train_loss": 1.3166886057736626,
65
+ "epoch": 13
66
+ },
67
+ {
68
+ "kd_train_loss": 0.06744800969400819,
69
+ "ce_train_loss": 1.3090809386685223,
70
+ "epoch": 14
71
+ },
72
+ {
73
+ "kd_train_loss": 0.0634845183372486,
74
+ "ce_train_loss": 1.3027143190358639,
75
+ "epoch": 15
76
+ },
77
+ {
78
+ "kd_train_loss": 0.059948685925253556,
79
+ "ce_train_loss": 1.2982534907311085,
80
+ "epoch": 16
81
+ },
82
+ {
83
+ "kd_train_loss": 0.05605776371058318,
84
+ "ce_train_loss": 1.294342599036818,
85
+ "epoch": 17
86
+ },
87
+ {
88
+ "kd_train_loss": 0.05173261831380247,
89
+ "ce_train_loss": 1.2884771879167178,
90
+ "epoch": 18
91
+ },
92
+ {
93
+ "kd_train_loss": 0.0496412553909414,
94
+ "ce_train_loss": 1.2861254281558194,
95
+ "epoch": 19
96
+ },
97
+ {
98
+ "kd_train_loss": 0.04744180476558645,
99
+ "ce_train_loss": 1.282894245670442,
100
+ "epoch": 20
101
+ },
102
+ {
103
+ "kd_train_loss": 0.04444942316904211,
104
+ "ce_train_loss": 1.28070035802138,
105
+ "epoch": 21
106
+ },
107
+ {
108
+ "kd_train_loss": 0.041973895603821365,
109
+ "ce_train_loss": 1.27674570335881,
110
+ "epoch": 22
111
+ },
112
+ {
113
+ "kd_train_loss": 0.040531559972875025,
114
+ "ce_train_loss": 1.275717901502934,
115
+ "epoch": 23
116
+ },
117
+ {
118
+ "kd_train_loss": 0.038666574527971904,
119
+ "ce_train_loss": 1.272625279166414,
120
+ "epoch": 24
121
+ },
122
+ {
123
+ "kd_train_loss": 0.036579677870675675,
124
+ "ce_train_loss": 1.2708269421908416,
125
+ "epoch": 25
126
+ },
127
+ {
128
+ "kd_train_loss": 0.03472885877018851,
129
+ "ce_train_loss": 1.267079499905126,
130
+ "epoch": 26
131
+ },
132
+ {
133
+ "kd_train_loss": 0.03300567091074616,
134
+ "ce_train_loss": 1.265107855630497,
135
+ "epoch": 27
136
+ },
137
+ {
138
+ "kd_train_loss": 0.031267707465856755,
139
+ "ce_train_loss": 1.263978869895961,
140
+ "epoch": 28
141
+ },
142
+ {
143
+ "kd_train_loss": 0.02948023329343028,
144
+ "ce_train_loss": 1.2610492245907534,
145
+ "epoch": 29
146
+ },
147
+ {
148
+ "kd_train_loss": 0.028111331475519707,
149
+ "ce_train_loss": 1.2602382712617852,
150
+ "epoch": 30
151
+ },
152
+ {
153
+ "kd_train_loss": 0.026608478986394655,
154
+ "ce_train_loss": 1.2580400243320227,
155
+ "epoch": 31
156
+ },
157
+ {
158
+ "kd_train_loss": 0.0248751914831327,
159
+ "ce_train_loss": 1.2560983723202257,
160
+ "epoch": 32
161
+ },
162
+ {
163
+ "kd_train_loss": 0.023646128705622146,
164
+ "ce_train_loss": 1.2534775997824903,
165
+ "epoch": 33
166
+ },
167
+ {
168
+ "kd_train_loss": 0.02271544611893031,
169
+ "ce_train_loss": 1.2530935516091686,
170
+ "epoch": 34
171
+ },
172
+ {
173
+ "kd_train_loss": 0.021936021171748313,
174
+ "ce_train_loss": 1.2515547741509376,
175
+ "epoch": 35
176
+ },
177
+ {
178
+ "kd_train_loss": 0.021128209527009924,
179
+ "ce_train_loss": 1.2513876314494912,
180
+ "epoch": 36
181
+ },
182
+ {
183
+ "kd_train_loss": 0.019934771938535472,
184
+ "ce_train_loss": 1.2498082426243802,
185
+ "epoch": 37
186
+ },
187
+ {
188
+ "kd_train_loss": 0.018994240464449558,
189
+ "ce_train_loss": 1.2493383634257669,
190
+ "epoch": 38
191
+ }
192
+ ]
validation_metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "exact_match": 37.2,
4
+ "f1": 59.57842060282067,
5
+ "validation_loss": 2.41743141412735,
6
+ "epoch": 1
7
+ },
8
+ {
9
+ "exact_match": 35.0,
10
+ "f1": 59.76357784672174,
11
+ "validation_loss": 2.4413123428821564,
12
+ "epoch": 2
13
+ },
14
+ {
15
+ "exact_match": 37.2,
16
+ "f1": 60.33021590001654,
17
+ "validation_loss": 2.419440597295761,
18
+ "epoch": 3
19
+ },
20
+ {
21
+ "exact_match": 37.0,
22
+ "f1": 60.876613790878174,
23
+ "validation_loss": 2.393815279006958,
24
+ "epoch": 4
25
+ },
26
+ {
27
+ "exact_match": 38.6,
28
+ "f1": 62.11650390602729,
29
+ "validation_loss": 2.279040962457657,
30
+ "epoch": 5
31
+ },
32
+ {
33
+ "exact_match": 36.2,
34
+ "f1": 60.24461119706418,
35
+ "validation_loss": 2.313094735145569,
36
+ "epoch": 6
37
+ },
38
+ {
39
+ "exact_match": 39.4,
40
+ "f1": 62.563444650375324,
41
+ "validation_loss": 2.3069529682397842,
42
+ "epoch": 7
43
+ },
44
+ {
45
+ "exact_match": 38.0,
46
+ "f1": 61.676504733167725,
47
+ "validation_loss": 2.2878972589969635,
48
+ "epoch": 8
49
+ },
50
+ {
51
+ "exact_match": 39.8,
52
+ "f1": 63.639278114254296,
53
+ "validation_loss": 2.2642438858747482,
54
+ "epoch": 9
55
+ },
56
+ {
57
+ "exact_match": 38.6,
58
+ "f1": 62.84171569640027,
59
+ "validation_loss": 2.236086845397949,
60
+ "epoch": 10
61
+ },
62
+ {
63
+ "exact_match": 39.2,
64
+ "f1": 61.88612229562657,
65
+ "validation_loss": 2.2972750663757324,
66
+ "epoch": 11
67
+ },
68
+ {
69
+ "exact_match": 40.0,
70
+ "f1": 63.41957204484483,
71
+ "validation_loss": 2.2380547374486923,
72
+ "epoch": 12
73
+ },
74
+ {
75
+ "exact_match": 38.8,
76
+ "f1": 63.0855328431726,
77
+ "validation_loss": 2.2377340495586395,
78
+ "epoch": 13
79
+ },
80
+ {
81
+ "exact_match": 38.6,
82
+ "f1": 63.128146216898976,
83
+ "validation_loss": 2.2086049765348434,
84
+ "epoch": 14
85
+ },
86
+ {
87
+ "exact_match": 40.0,
88
+ "f1": 64.54242193361432,
89
+ "validation_loss": 2.2143527567386627,
90
+ "epoch": 15
91
+ },
92
+ {
93
+ "exact_match": 40.0,
94
+ "f1": 63.278631848762295,
95
+ "validation_loss": 2.2167883813381195,
96
+ "epoch": 16
97
+ },
98
+ {
99
+ "exact_match": 40.2,
100
+ "f1": 64.27562472854457,
101
+ "validation_loss": 2.2080784142017365,
102
+ "epoch": 17
103
+ },
104
+ {
105
+ "exact_match": 40.0,
106
+ "f1": 64.15211177156412,
107
+ "validation_loss": 2.23178893327713,
108
+ "epoch": 18
109
+ },
110
+ {
111
+ "exact_match": 39.6,
112
+ "f1": 63.7482440053434,
113
+ "validation_loss": 2.216603860259056,
114
+ "epoch": 19
115
+ },
116
+ {
117
+ "exact_match": 39.0,
118
+ "f1": 62.873221029732164,
119
+ "validation_loss": 2.2428862750530243,
120
+ "epoch": 20
121
+ },
122
+ {
123
+ "exact_match": 38.6,
124
+ "f1": 63.0879279178262,
125
+ "validation_loss": 2.206653445959091,
126
+ "epoch": 21
127
+ },
128
+ {
129
+ "exact_match": 41.0,
130
+ "f1": 64.54332732301185,
131
+ "validation_loss": 2.2194953709840775,
132
+ "epoch": 22
133
+ },
134
+ {
135
+ "exact_match": 40.6,
136
+ "f1": 63.80289079581061,
137
+ "validation_loss": 2.203217476606369,
138
+ "epoch": 23
139
+ },
140
+ {
141
+ "exact_match": 40.0,
142
+ "f1": 63.404288028303924,
143
+ "validation_loss": 2.215685695409775,
144
+ "epoch": 24
145
+ },
146
+ {
147
+ "exact_match": 38.8,
148
+ "f1": 62.93220915086903,
149
+ "validation_loss": 2.2293421030044556,
150
+ "epoch": 25
151
+ },
152
+ {
153
+ "exact_match": 39.6,
154
+ "f1": 63.04610033996446,
155
+ "validation_loss": 2.1999125480651855,
156
+ "epoch": 26
157
+ },
158
+ {
159
+ "exact_match": 38.8,
160
+ "f1": 63.37994688557569,
161
+ "validation_loss": 2.19596266746521,
162
+ "epoch": 27
163
+ },
164
+ {
165
+ "exact_match": 39.8,
166
+ "f1": 63.36716124332587,
167
+ "validation_loss": 2.207815259695053,
168
+ "epoch": 28
169
+ },
170
+ {
171
+ "exact_match": 39.0,
172
+ "f1": 63.20235070703525,
173
+ "validation_loss": 2.2127343714237213,
174
+ "epoch": 29
175
+ },
176
+ {
177
+ "exact_match": 39.8,
178
+ "f1": 64.53722000128786,
179
+ "validation_loss": 2.2037834227085114,
180
+ "epoch": 30
181
+ },
182
+ {
183
+ "exact_match": 39.0,
184
+ "f1": 63.89482166971992,
185
+ "validation_loss": 2.202571988105774,
186
+ "epoch": 31
187
+ },
188
+ {
189
+ "exact_match": 39.6,
190
+ "f1": 63.50426623795006,
191
+ "validation_loss": 2.2169955670833588,
192
+ "epoch": 32
193
+ },
194
+ {
195
+ "exact_match": 39.2,
196
+ "f1": 63.413569332959725,
197
+ "validation_loss": 2.2177259027957916,
198
+ "epoch": 33
199
+ },
200
+ {
201
+ "exact_match": 40.2,
202
+ "f1": 64.15148141858079,
203
+ "validation_loss": 2.2170828878879547,
204
+ "epoch": 34
205
+ },
206
+ {
207
+ "exact_match": 39.2,
208
+ "f1": 63.379632141043025,
209
+ "validation_loss": 2.221564292907715,
210
+ "epoch": 35
211
+ },
212
+ {
213
+ "exact_match": 38.6,
214
+ "f1": 63.822407502125486,
215
+ "validation_loss": 2.2223614752292633,
216
+ "epoch": 36
217
+ },
218
+ {
219
+ "exact_match": 38.4,
220
+ "f1": 63.25330786310625,
221
+ "validation_loss": 2.2186082154512405,
222
+ "epoch": 37
223
+ },
224
+ {
225
+ "exact_match": 39.4,
226
+ "f1": 63.81577942649743,
227
+ "validation_loss": 2.2130299359560013,
228
+ "epoch": 38
229
+ }
230
+ ]