josecannete commited on
Commit
9fe3028
1 Parent(s): fead68b

adding model distilled on tar

Browse files
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "josecannete/albert-base-spanish-6",
3
+ "architectures": [
4
+ "AlbertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "inner_group_num": 1,
18
+ "intermediate_size": 3072,
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "albert",
22
+ "net_structure_type": 0,
23
+ "num_attention_heads": 12,
24
+ "num_hidden_groups": 1,
25
+ "num_hidden_layers": 6,
26
+ "num_memory_blocks": 0,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.25.1",
31
+ "type_vocab_size": 2,
32
+ "vocab_size": 31000
33
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcf4da4bfccdb30c54e6f2c1447a3692ef930e5bd2ba40e26fa2cfa866be816
3
+ size 44903231
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": true,
7
+ "mask_token": {
8
+ "__type": "AddedToken",
9
+ "content": "[MASK]",
10
+ "lstrip": true,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "model_max_length": 512,
16
+ "name_or_path": "josecannete/albert-base-spanish-6",
17
+ "pad_token": "<pad>",
18
+ "remove_space": true,
19
+ "sep_token": "[SEP]",
20
+ "special_tokens_map_file": null,
21
+ "tokenizer_class": "AlbertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
train_metrics.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "kd_train_loss": 0.6830137466867171,
4
+ "ce_train_loss": 1.7628213724558084,
5
+ "epoch": 1
6
+ },
7
+ {
8
+ "kd_train_loss": 0.312156816278328,
9
+ "ce_train_loss": 1.3656892305714694,
10
+ "epoch": 2
11
+ },
12
+ {
13
+ "kd_train_loss": 0.2025291430952338,
14
+ "ce_train_loss": 1.2286346414708065,
15
+ "epoch": 3
16
+ },
17
+ {
18
+ "kd_train_loss": 0.1675652732452662,
19
+ "ce_train_loss": 1.1781924959846626,
20
+ "epoch": 4
21
+ },
22
+ {
23
+ "kd_train_loss": 0.141320709960764,
24
+ "ce_train_loss": 1.1402555186406045,
25
+ "epoch": 5
26
+ },
27
+ {
28
+ "kd_train_loss": 0.12487025479499198,
29
+ "ce_train_loss": 1.1171601120514283,
30
+ "epoch": 6
31
+ },
32
+ {
33
+ "kd_train_loss": 0.11275445013108412,
34
+ "ce_train_loss": 1.0968023719815412,
35
+ "epoch": 7
36
+ },
37
+ {
38
+ "kd_train_loss": 0.10166890256519719,
39
+ "ce_train_loss": 1.0831254142840065,
40
+ "epoch": 8
41
+ },
42
+ {
43
+ "kd_train_loss": 0.09210073803200085,
44
+ "ce_train_loss": 1.0701948024150536,
45
+ "epoch": 9
46
+ },
47
+ {
48
+ "kd_train_loss": 0.08462942873219091,
49
+ "ce_train_loss": 1.0593493170343609,
50
+ "epoch": 10
51
+ },
52
+ {
53
+ "kd_train_loss": 0.07805590746184791,
54
+ "ce_train_loss": 1.0501136871593257,
55
+ "epoch": 11
56
+ },
57
+ {
58
+ "kd_train_loss": 0.0719236569506949,
59
+ "ce_train_loss": 1.042688865954755,
60
+ "epoch": 12
61
+ },
62
+ {
63
+ "kd_train_loss": 0.06575698776675148,
64
+ "ce_train_loss": 1.0340635537927358,
65
+ "epoch": 13
66
+ },
67
+ {
68
+ "kd_train_loss": 0.061664894964740175,
69
+ "ce_train_loss": 1.0281955557194584,
70
+ "epoch": 14
71
+ },
72
+ {
73
+ "kd_train_loss": 0.05768087919770237,
74
+ "ce_train_loss": 1.0229004654426275,
75
+ "epoch": 15
76
+ },
77
+ {
78
+ "kd_train_loss": 0.05289051104289443,
79
+ "ce_train_loss": 1.0158004854180018,
80
+ "epoch": 16
81
+ },
82
+ {
83
+ "kd_train_loss": 0.04982077079508324,
84
+ "ce_train_loss": 1.0126711427241923,
85
+ "epoch": 17
86
+ },
87
+ {
88
+ "kd_train_loss": 0.04692809136005168,
89
+ "ce_train_loss": 1.008497337627011,
90
+ "epoch": 18
91
+ },
92
+ {
93
+ "kd_train_loss": 0.04335454953382844,
94
+ "ce_train_loss": 1.0044087147706189,
95
+ "epoch": 19
96
+ },
97
+ {
98
+ "kd_train_loss": 0.0412886721984155,
99
+ "ce_train_loss": 1.0013153468914984,
100
+ "epoch": 20
101
+ },
102
+ {
103
+ "kd_train_loss": 0.03853050418126129,
104
+ "ce_train_loss": 0.9989704343575524,
105
+ "epoch": 21
106
+ },
107
+ {
108
+ "kd_train_loss": 0.03571276531279402,
109
+ "ce_train_loss": 0.9921581195113957,
110
+ "epoch": 22
111
+ },
112
+ {
113
+ "kd_train_loss": 0.03432205324191933,
114
+ "ce_train_loss": 0.9936106575913035,
115
+ "epoch": 23
116
+ },
117
+ {
118
+ "kd_train_loss": 0.03262581905357559,
119
+ "ce_train_loss": 0.9905654863745376,
120
+ "epoch": 24
121
+ },
122
+ {
123
+ "kd_train_loss": 0.030242365318093472,
124
+ "ce_train_loss": 0.9878983724749614,
125
+ "epoch": 25
126
+ },
127
+ {
128
+ "kd_train_loss": 0.02896331250090376,
129
+ "ce_train_loss": 0.9868850504997826,
130
+ "epoch": 26
131
+ },
132
+ {
133
+ "kd_train_loss": 0.02786781762482485,
134
+ "ce_train_loss": 0.9844534928580074,
135
+ "epoch": 27
136
+ },
137
+ {
138
+ "kd_train_loss": 0.025694916236668967,
139
+ "ce_train_loss": 0.9822443084987549,
140
+ "epoch": 28
141
+ },
142
+ {
143
+ "kd_train_loss": 0.024073054125132547,
144
+ "ce_train_loss": 0.9799835870529325,
145
+ "epoch": 29
146
+ },
147
+ {
148
+ "kd_train_loss": 0.023106643767783645,
149
+ "ce_train_loss": 0.9782071878581511,
150
+ "epoch": 30
151
+ },
152
+ {
153
+ "kd_train_loss": 0.021601043730523305,
154
+ "ce_train_loss": 0.9769454616516817,
155
+ "epoch": 31
156
+ },
157
+ {
158
+ "kd_train_loss": 0.020308788988034395,
159
+ "ce_train_loss": 0.9752770729724222,
160
+ "epoch": 32
161
+ },
162
+ {
163
+ "kd_train_loss": 0.019418448640961868,
164
+ "ce_train_loss": 0.974066774812746,
165
+ "epoch": 33
166
+ },
167
+ {
168
+ "kd_train_loss": 0.018437625705683823,
169
+ "ce_train_loss": 0.9732851413884304,
170
+ "epoch": 34
171
+ },
172
+ {
173
+ "kd_train_loss": 0.01717964943272248,
174
+ "ce_train_loss": 0.9717762321233749,
175
+ "epoch": 35
176
+ },
177
+ {
178
+ "kd_train_loss": 0.016244565310908024,
179
+ "ce_train_loss": 0.9705157853470117,
180
+ "epoch": 36
181
+ },
182
+ {
183
+ "kd_train_loss": 0.015381258914251909,
184
+ "ce_train_loss": 0.9697859634937616,
185
+ "epoch": 37
186
+ },
187
+ {
188
+ "kd_train_loss": 0.014527724452813829,
189
+ "ce_train_loss": 0.9681962279346882,
190
+ "epoch": 38
191
+ },
192
+ {
193
+ "kd_train_loss": 0.013673930659630776,
194
+ "ce_train_loss": 0.9671593079526104,
195
+ "epoch": 39
196
+ },
197
+ {
198
+ "kd_train_loss": 0.012925356253709969,
199
+ "ce_train_loss": 0.9662841369136163,
200
+ "epoch": 40
201
+ },
202
+ {
203
+ "kd_train_loss": 0.012114975042403494,
204
+ "ce_train_loss": 0.9656250225804471,
205
+ "epoch": 41
206
+ },
207
+ {
208
+ "kd_train_loss": 0.011418479984687967,
209
+ "ce_train_loss": 0.9645477702995308,
210
+ "epoch": 42
211
+ },
212
+ {
213
+ "kd_train_loss": 0.010792658223580083,
214
+ "ce_train_loss": 0.9636148070924003,
215
+ "epoch": 43
216
+ },
217
+ {
218
+ "kd_train_loss": 0.010167134099653873,
219
+ "ce_train_loss": 0.9630015806162222,
220
+ "epoch": 44
221
+ },
222
+ {
223
+ "kd_train_loss": 0.009592672717607113,
224
+ "ce_train_loss": 0.9621474482241313,
225
+ "epoch": 45
226
+ },
227
+ {
228
+ "kd_train_loss": 0.009105226525108591,
229
+ "ce_train_loss": 0.9620632947881944,
230
+ "epoch": 46
231
+ },
232
+ {
233
+ "kd_train_loss": 0.00871528322197975,
234
+ "ce_train_loss": 0.9614161174857091,
235
+ "epoch": 47
236
+ }
237
+ ]
validation_metrics.json ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "exact_match": 56.773888363292336,
4
+ "f1": 73.72417619315645,
5
+ "validation_loss": 1.6563380337264164,
6
+ "epoch": 1
7
+ },
8
+ {
9
+ "exact_match": 57.43614001892148,
10
+ "f1": 74.06148403773376,
11
+ "validation_loss": 1.64034905401339,
12
+ "epoch": 2
13
+ },
14
+ {
15
+ "exact_match": 57.89025543992432,
16
+ "f1": 74.36047563454983,
17
+ "validation_loss": 1.6418569500905922,
18
+ "epoch": 3
19
+ },
20
+ {
21
+ "exact_match": 58.24976348155156,
22
+ "f1": 74.70328465729796,
23
+ "validation_loss": 1.6277078729437058,
24
+ "epoch": 4
25
+ },
26
+ {
27
+ "exact_match": 58.36329233680227,
28
+ "f1": 74.72256680433863,
29
+ "validation_loss": 1.6119426237531456,
30
+ "epoch": 5
31
+ },
32
+ {
33
+ "exact_match": 58.19299905392621,
34
+ "f1": 74.66767848852678,
35
+ "validation_loss": 1.5854417412395936,
36
+ "epoch": 6
37
+ },
38
+ {
39
+ "exact_match": 58.42951750236519,
40
+ "f1": 75.10726299069853,
41
+ "validation_loss": 1.5882954085806766,
42
+ "epoch": 7
43
+ },
44
+ {
45
+ "exact_match": 58.54304635761589,
46
+ "f1": 75.12018789991633,
47
+ "validation_loss": 1.5932509220508209,
48
+ "epoch": 8
49
+ },
50
+ {
51
+ "exact_match": 58.94039735099338,
52
+ "f1": 75.72252412564588,
53
+ "validation_loss": 1.5573067957737359,
54
+ "epoch": 9
55
+ },
56
+ {
57
+ "exact_match": 58.751182592242195,
58
+ "f1": 75.57069782633266,
59
+ "validation_loss": 1.5627067006496063,
60
+ "epoch": 10
61
+ },
62
+ {
63
+ "exact_match": 58.71333964049196,
64
+ "f1": 75.66181015203476,
65
+ "validation_loss": 1.5502030389854706,
66
+ "epoch": 11
67
+ },
68
+ {
69
+ "exact_match": 58.68495742667928,
70
+ "f1": 75.41545514304845,
71
+ "validation_loss": 1.5601499206330403,
72
+ "epoch": 12
73
+ },
74
+ {
75
+ "exact_match": 58.893093661305585,
76
+ "f1": 75.61820191324081,
77
+ "validation_loss": 1.5579824165766498,
78
+ "epoch": 13
79
+ },
80
+ {
81
+ "exact_match": 59.15799432355724,
82
+ "f1": 75.80316995714823,
83
+ "validation_loss": 1.542337314371603,
84
+ "epoch": 14
85
+ },
86
+ {
87
+ "exact_match": 58.58088930936613,
88
+ "f1": 75.35689271402173,
89
+ "validation_loss": 1.563731160329049,
90
+ "epoch": 15
91
+ },
92
+ {
93
+ "exact_match": 59.271523178807946,
94
+ "f1": 76.03559936933563,
95
+ "validation_loss": 1.5445514630122357,
96
+ "epoch": 16
97
+ },
98
+ {
99
+ "exact_match": 58.893093661305585,
100
+ "f1": 75.87361664177146,
101
+ "validation_loss": 1.5556478916880596,
102
+ "epoch": 17
103
+ },
104
+ {
105
+ "exact_match": 58.9120151371807,
106
+ "f1": 75.86437025057967,
107
+ "validation_loss": 1.5426960688039482,
108
+ "epoch": 18
109
+ },
110
+ {
111
+ "exact_match": 58.94039735099338,
112
+ "f1": 75.78068356142592,
113
+ "validation_loss": 1.545481281646763,
114
+ "epoch": 19
115
+ },
116
+ {
117
+ "exact_match": 59.11069063386944,
118
+ "f1": 76.12330260330774,
119
+ "validation_loss": 1.5343132101627717,
120
+ "epoch": 20
121
+ },
122
+ {
123
+ "exact_match": 59.04446546830653,
124
+ "f1": 76.02047375062949,
125
+ "validation_loss": 1.534107970904155,
126
+ "epoch": 21
127
+ },
128
+ {
129
+ "exact_match": 59.40397350993378,
130
+ "f1": 76.30809534965822,
131
+ "validation_loss": 1.5312127582280033,
132
+ "epoch": 22
133
+ },
134
+ {
135
+ "exact_match": 59.120151371807,
136
+ "f1": 76.15914511214865,
137
+ "validation_loss": 1.5348517876791667,
138
+ "epoch": 23
139
+ },
140
+ {
141
+ "exact_match": 59.34720908230842,
142
+ "f1": 76.25837981885495,
143
+ "validation_loss": 1.5409924899598202,
144
+ "epoch": 24
145
+ },
146
+ {
147
+ "exact_match": 59.44181646168401,
148
+ "f1": 76.26897303750731,
149
+ "validation_loss": 1.5510856476892907,
150
+ "epoch": 25
151
+ },
152
+ {
153
+ "exact_match": 59.498580889309366,
154
+ "f1": 76.17147587308575,
155
+ "validation_loss": 1.5561939521008228,
156
+ "epoch": 26
157
+ },
158
+ {
159
+ "exact_match": 59.498580889309366,
160
+ "f1": 76.30106617121422,
161
+ "validation_loss": 1.542546825595649,
162
+ "epoch": 27
163
+ },
164
+ {
165
+ "exact_match": 59.5364238410596,
166
+ "f1": 76.38168088916433,
167
+ "validation_loss": 1.5348846079355263,
168
+ "epoch": 28
169
+ },
170
+ {
171
+ "exact_match": 59.44181646168401,
172
+ "f1": 76.41095935853004,
173
+ "validation_loss": 1.5319310460823128,
174
+ "epoch": 29
175
+ },
176
+ {
177
+ "exact_match": 59.366130558183535,
178
+ "f1": 76.28434164257216,
179
+ "validation_loss": 1.5311855693538505,
180
+ "epoch": 30
181
+ },
182
+ {
183
+ "exact_match": 59.280983916745505,
184
+ "f1": 76.23531674844706,
185
+ "validation_loss": 1.5304096960518734,
186
+ "epoch": 31
187
+ },
188
+ {
189
+ "exact_match": 59.47019867549669,
190
+ "f1": 76.37410440702645,
191
+ "validation_loss": 1.5263439420476017,
192
+ "epoch": 32
193
+ },
194
+ {
195
+ "exact_match": 59.21475875118259,
196
+ "f1": 76.21030265754071,
197
+ "validation_loss": 1.5248871047094643,
198
+ "epoch": 33
199
+ },
200
+ {
201
+ "exact_match": 59.413434247871336,
202
+ "f1": 76.3041731495609,
203
+ "validation_loss": 1.5274084300161845,
204
+ "epoch": 34
205
+ },
206
+ {
207
+ "exact_match": 59.583727530747396,
208
+ "f1": 76.42354907795938,
209
+ "validation_loss": 1.523247041077499,
210
+ "epoch": 35
211
+ },
212
+ {
213
+ "exact_match": 59.52696310312204,
214
+ "f1": 76.46389656736687,
215
+ "validation_loss": 1.5215930620948952,
216
+ "epoch": 36
217
+ },
218
+ {
219
+ "exact_match": 59.46073793755913,
220
+ "f1": 76.37110704650382,
221
+ "validation_loss": 1.5274385228214493,
222
+ "epoch": 37
223
+ },
224
+ {
225
+ "exact_match": 59.432355723746454,
226
+ "f1": 76.42993032161579,
227
+ "validation_loss": 1.526159930660064,
228
+ "epoch": 38
229
+ },
230
+ {
231
+ "exact_match": 59.593188268684955,
232
+ "f1": 76.52329953432769,
233
+ "validation_loss": 1.5264484878764095,
234
+ "epoch": 39
235
+ },
236
+ {
237
+ "exact_match": 59.3755912961211,
238
+ "f1": 76.35802800927557,
239
+ "validation_loss": 1.5288960034229668,
240
+ "epoch": 40
241
+ },
242
+ {
243
+ "exact_match": 59.366130558183535,
244
+ "f1": 76.28329017065013,
245
+ "validation_loss": 1.5268410180347511,
246
+ "epoch": 41
247
+ },
248
+ {
249
+ "exact_match": 59.517502365184484,
250
+ "f1": 76.35262610336038,
251
+ "validation_loss": 1.5267506884523185,
252
+ "epoch": 42
253
+ },
254
+ {
255
+ "exact_match": 59.47019867549669,
256
+ "f1": 76.43890493947012,
257
+ "validation_loss": 1.527349637394928,
258
+ "epoch": 43
259
+ },
260
+ {
261
+ "exact_match": 59.271523178807946,
262
+ "f1": 76.31567188213893,
263
+ "validation_loss": 1.5279370821024998,
264
+ "epoch": 44
265
+ },
266
+ {
267
+ "exact_match": 59.23368022705771,
268
+ "f1": 76.23882730901728,
269
+ "validation_loss": 1.530127781162779,
270
+ "epoch": 45
271
+ },
272
+ {
273
+ "exact_match": 59.14853358561968,
274
+ "f1": 76.12929812347261,
275
+ "validation_loss": 1.5308673516454467,
276
+ "epoch": 46
277
+ },
278
+ {
279
+ "exact_match": 59.11069063386944,
280
+ "f1": 76.08559158506698,
281
+ "validation_loss": 1.5331473354115543,
282
+ "epoch": 47
283
+ }
284
+ ]