ShukantP commited on
Commit
b8e0682
·
verified ·
1 Parent(s): e9feeb5

Cronformer's output tokenizer and model config

Browse files
Files changed (2) hide show
  1. config.json +25 -0
  2. tokenizer.json +262 -0
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CronformerModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "cron_vocab_size": 78,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "lang_tokenizer": "distilbert/distilbert-base-uncased",
13
+ "lang_vocab_size": 30522,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "cronformer",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_dec_layers": 4,
19
+ "num_hidden_enc_layers": 6,
20
+ "num_hidden_layers": 10,
21
+ "pad_token_id": 0,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.42.4",
24
+ "type_vocab_size": 2
25
+ }
tokenizer.json ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 65,
8
+ "special": true,
9
+ "content": "<s>",
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "single_word": false
14
+ },
15
+ {
16
+ "id": 66,
17
+ "special": true,
18
+ "content": "</s>",
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "id": 67,
26
+ "special": true,
27
+ "content": "<pad>",
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "single_word": false
32
+ },
33
+ {
34
+ "id": 68,
35
+ "special": true,
36
+ "content": "<minute>",
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "single_word": false
41
+ },
42
+ {
43
+ "id": 69,
44
+ "special": true,
45
+ "content": "</minute>",
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "single_word": false
50
+ },
51
+ {
52
+ "id": 70,
53
+ "special": true,
54
+ "content": "<hour>",
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "single_word": false
59
+ },
60
+ {
61
+ "id": 71,
62
+ "special": true,
63
+ "content": "</hour>",
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "single_word": false
68
+ },
69
+ {
70
+ "id": 72,
71
+ "special": true,
72
+ "content": "<date>",
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "single_word": false
77
+ },
78
+ {
79
+ "id": 73,
80
+ "special": true,
81
+ "content": "</date>",
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "id": 74,
89
+ "special": true,
90
+ "content": "<month>",
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "single_word": false
95
+ },
96
+ {
97
+ "id": 75,
98
+ "special": true,
99
+ "content": "</month>",
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "single_word": false
104
+ },
105
+ {
106
+ "id": 76,
107
+ "special": true,
108
+ "content": "<day_of_week>",
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "single_word": false
113
+ },
114
+ {
115
+ "id": 77,
116
+ "special": true,
117
+ "content": "</day_of_week>",
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "single_word": false
122
+ }
123
+ ],
124
+ "normalizer": null,
125
+ "pre_tokenizer": null,
126
+ "post_processor": null,
127
+ "decoder": {
128
+ "type": "ByteLevel",
129
+ "add_prefix_space": false,
130
+ "trim_offsets": false,
131
+ "use_regex": false
132
+ },
133
+ "model": {
134
+ "type": "BPE",
135
+ "dropout": null,
136
+ "unk_token": null,
137
+ "continuing_subword_prefix": null,
138
+ "end_of_word_suffix": null,
139
+ "fuse_unk": false,
140
+ "byte_fallback": false,
141
+ "ignore_merges": true,
142
+ "vocab": {
143
+ "0": 0,
144
+ "1": 1,
145
+ "2": 2,
146
+ "3": 3,
147
+ "4": 4,
148
+ "5": 5,
149
+ "6": 6,
150
+ "7": 7,
151
+ "8": 8,
152
+ "9": 9,
153
+ "10": 10,
154
+ "11": 11,
155
+ "12": 12,
156
+ "13": 13,
157
+ "14": 14,
158
+ "15": 15,
159
+ "16": 16,
160
+ "17": 17,
161
+ "18": 18,
162
+ "19": 19,
163
+ "20": 20,
164
+ "21": 21,
165
+ "22": 22,
166
+ "23": 23,
167
+ "24": 24,
168
+ "25": 25,
169
+ "26": 26,
170
+ "27": 27,
171
+ "28": 28,
172
+ "29": 29,
173
+ "30": 30,
174
+ "31": 31,
175
+ "32": 32,
176
+ "33": 33,
177
+ "34": 34,
178
+ "35": 35,
179
+ "36": 36,
180
+ "37": 37,
181
+ "38": 38,
182
+ "39": 39,
183
+ "40": 40,
184
+ "41": 41,
185
+ "42": 42,
186
+ "43": 43,
187
+ "44": 44,
188
+ "45": 45,
189
+ "46": 46,
190
+ "47": 47,
191
+ "48": 48,
192
+ "49": 49,
193
+ "50": 50,
194
+ "51": 51,
195
+ "52": 52,
196
+ "53": 53,
197
+ "54": 54,
198
+ "55": 55,
199
+ "56": 56,
200
+ "57": 57,
201
+ "58": 58,
202
+ "59": 59,
203
+ "-": 60,
204
+ ",": 61,
205
+ "/": 62,
206
+ "*": 63,
207
+ "%": 64
208
+ },
209
+ "merges": [
210
+ "5 9",
211
+ "5 8",
212
+ "5 7",
213
+ "5 6",
214
+ "5 5",
215
+ "5 4",
216
+ "5 3",
217
+ "5 2",
218
+ "5 1",
219
+ "5 0",
220
+ "4 9",
221
+ "4 8",
222
+ "4 7",
223
+ "4 6",
224
+ "4 5",
225
+ "4 4",
226
+ "4 3",
227
+ "4 2",
228
+ "4 1",
229
+ "4 0",
230
+ "3 9",
231
+ "3 8",
232
+ "3 7",
233
+ "3 6",
234
+ "3 5",
235
+ "3 4",
236
+ "3 3",
237
+ "3 2",
238
+ "3 1",
239
+ "3 0",
240
+ "2 9",
241
+ "2 8",
242
+ "2 7",
243
+ "2 6",
244
+ "2 5",
245
+ "2 4",
246
+ "2 3",
247
+ "2 2",
248
+ "2 1",
249
+ "2 0",
250
+ "1 9",
251
+ "1 8",
252
+ "1 7",
253
+ "1 6",
254
+ "1 5",
255
+ "1 4",
256
+ "1 3",
257
+ "1 2",
258
+ "1 1",
259
+ "1 0"
260
+ ]
261
+ }
262
+ }