bishaltwr commited on
Commit
1b393ac
·
verified ·
1 Parent(s): 90d9cdb

Update vocab.json

Browse files
Files changed (1) hide show
  1. vocab.json +252 -97
vocab.json CHANGED
@@ -1,97 +1,252 @@
1
- {
2
- "npi": {
3
- "(": 1,
4
- ")": 2,
5
- "/": 3,
6
- "[PAD]": 92,
7
- "[UNK]": 91,
8
- "a": 4,
9
- "b": 5,
10
- "c": 6,
11
- "e": 7,
12
- "f": 8,
13
- "k": 9,
14
- "o": 10,
15
- "|": 0,
16
- "": 11,
17
- "": 12,
18
- "": 13,
19
- "": 14,
20
- "": 15,
21
- "": 16,
22
- "": 17,
23
- "": 18,
24
- "": 19,
25
- "": 20,
26
- "": 21,
27
- "": 22,
28
- "": 23,
29
- "": 24,
30
- "": 25,
31
- "": 26,
32
- "": 27,
33
- "": 28,
34
- "": 29,
35
- "": 30,
36
- "": 31,
37
- "": 32,
38
- "": 33,
39
- "": 34,
40
- "": 35,
41
- "": 36,
42
- "": 37,
43
- "": 38,
44
- "": 39,
45
- "": 40,
46
- "": 41,
47
- "": 42,
48
- "": 43,
49
- "": 44,
50
- "": 45,
51
- "": 46,
52
- "": 47,
53
- "": 48,
54
- "": 49,
55
- "": 50,
56
- "": 51,
57
- "": 52,
58
- "": 53,
59
- "": 54,
60
- "": 55,
61
- "": 56,
62
- "": 57,
63
- "": 58,
64
- "": 59,
65
- "": 60,
66
- "ि": 61,
67
- "": 62,
68
- "": 63,
69
- "": 64,
70
- "": 65,
71
- "": 66,
72
- "": 67,
73
- "": 68,
74
- "": 69,
75
- "": 70,
76
- "": 71,
77
- "": 72,
78
- "": 73,
79
- "": 74,
80
- "": 75,
81
- "": 76,
82
- "": 77,
83
- "": 78,
84
- "": 79,
85
- "": 80,
86
- "": 81,
87
- "": 82,
88
- "": 83,
89
- "": 84,
90
- "": 85,
91
- "": 86,
92
- "": 87,
93
- "": 88,
94
- "": 89,
95
- "": 90
96
- }
97
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"eng": {
2
+ "!": 75,
3
+ "\"": 62,
4
+ "$": 48,
5
+ "%": 47,
6
+ "&": 57,
7
+ "'": 31,
8
+ "+": 56,
9
+ ",": 44,
10
+ "-": 32,
11
+ ".": 37,
12
+ "/": 45,
13
+ "0": 27,
14
+ "1": 30,
15
+ "2": 35,
16
+ "3": 41,
17
+ "4": 39,
18
+ "5": 38,
19
+ "6": 40,
20
+ "7": 43,
21
+ "8": 42,
22
+ "9": 36,
23
+ ":": 46,
24
+ ";": 52,
25
+ "</s>": 2,
26
+ "<pad>": 0,
27
+ "<s>": 1,
28
+ "<unk>": 3,
29
+ "[": 59,
30
+ "]": 60,
31
+ "`": 92,
32
+ "a": 7,
33
+ "b": 24,
34
+ "c": 16,
35
+ "d": 15,
36
+ "e": 5,
37
+ "f": 19,
38
+ "g": 21,
39
+ "h": 13,
40
+ "i": 9,
41
+ "j": 29,
42
+ "k": 26,
43
+ "l": 14,
44
+ "m": 18,
45
+ "n": 10,
46
+ "o": 8,
47
+ "p": 20,
48
+ "q": 34,
49
+ "r": 12,
50
+ "s": 11,
51
+ "t": 6,
52
+ "u": 17,
53
+ "v": 25,
54
+ "w": 23,
55
+ "x": 28,
56
+ "y": 22,
57
+ "z": 33,
58
+ "|": 4,
59
+ "\u00a3": 68,
60
+ "\u00a5": 53,
61
+ "\u00a7": 139,
62
+ "\u00b0": 58,
63
+ "\u00b2": 76,
64
+ "\u00df": 82,
65
+ "\u00e0": 84,
66
+ "\u00e1": 49,
67
+ "\u00e2": 81,
68
+ "\u00e3": 66,
69
+ "\u00e4": 78,
70
+ "\u00e5": 123,
71
+ "\u00e6": 110,
72
+ "\u00e7": 64,
73
+ "\u00e8": 79,
74
+ "\u00e9": 55,
75
+ "\u00ea": 88,
76
+ "\u00eb": 87,
77
+ "\u00ed": 50,
78
+ "\u00ee": 97,
79
+ "\u00ef": 85,
80
+ "\u00f0": 98,
81
+ "\u00f1": 83,
82
+ "\u00f2": 105,
83
+ "\u00f3": 67,
84
+ "\u00f4": 86,
85
+ "\u00f5": 69,
86
+ "\u00f6": 74,
87
+ "\u00f8": 91,
88
+ "\u00fa": 70,
89
+ "\u00fb": 99,
90
+ "\u00fc": 54,
91
+ "\u00fd": 112,
92
+ "\u00fe": 132,
93
+ "\u0101": 100,
94
+ "\u0103": 102,
95
+ "\u0107": 94,
96
+ "\u010d": 89,
97
+ "\u0113": 143,
98
+ "\u0119": 119,
99
+ "\u011b": 140,
100
+ "\u011f": 153,
101
+ "\u012b": 107,
102
+ "\u0131": 103,
103
+ "\u0142": 73,
104
+ "\u0144": 118,
105
+ "\u0148": 137,
106
+ "\u014d": 80,
107
+ "\u0151": 120,
108
+ "\u0153": 96,
109
+ "\u0159": 108,
110
+ "\u015f": 116,
111
+ "\u0161": 90,
112
+ "\u016b": 101,
113
+ "\u017e": 95,
114
+ "\u0219": 104,
115
+ "\u0259": 138,
116
+ "\u02bb": 115,
117
+ "\u0301": 93,
118
+ "\u0307": 77,
119
+ "\u03b1": 106,
120
+ "\u03ba": 109,
121
+ "\u03c0": 117,
122
+ "\u03c7": 141,
123
+ "\u0430": 144,
124
+ "\u0432": 124,
125
+ "\u0435": 125,
126
+ "\u0437": 130,
127
+ "\u0438": 113,
128
+ "\u0439": 127,
129
+ "\u043a": 114,
130
+ "\u043b": 126,
131
+ "\u043d": 128,
132
+ "\u044c": 131,
133
+ "\u044f": 129,
134
+ "\u05e0": 147,
135
+ "\u05e2": 148,
136
+ "\u1e43": 121,
137
+ "\u1ea1": 111,
138
+ "\u1ea3": 145,
139
+ "\u1ecb": 146,
140
+ "\u1ee5": 122,
141
+ "\u200b": 71,
142
+ "\u2013": 61,
143
+ "\u2014": 51,
144
+ "\u2018": 72,
145
+ "\u2019": 65,
146
+ "\u201d": 63,
147
+ "\u2261": 142,
148
+ "\u4eac": 149,
149
+ "\u5148": 135,
150
+ "\u5927": 151,
151
+ "\u5c1a": 134,
152
+ "\u65f6": 133,
153
+ "\u751f": 136,
154
+ "\u90fd": 150,
155
+ "\u962a": 152
156
+ },
157
+ "npi": {
158
+ "(": 1,
159
+ ")": 2,
160
+ "/": 3,
161
+ "[PAD]": 92,
162
+ "[UNK]": 91,
163
+ "a": 4,
164
+ "b": 5,
165
+ "c": 6,
166
+ "e": 7,
167
+ "f": 8,
168
+ "k": 9,
169
+ "o": 10,
170
+ "|": 0,
171
+ "ँ": 11,
172
+ "ं": 12,
173
+ "ः": 13,
174
+ "अ": 14,
175
+ "आ": 15,
176
+ "इ": 16,
177
+ "ई": 17,
178
+ "उ": 18,
179
+ "ऊ": 19,
180
+ "ऋ": 20,
181
+ "ए": 21,
182
+ "ऐ": 22,
183
+ "ओ": 23,
184
+ "औ": 24,
185
+ "क": 25,
186
+ "ख": 26,
187
+ "ग": 27,
188
+ "घ": 28,
189
+ "ङ": 29,
190
+ "च": 30,
191
+ "छ": 31,
192
+ "ज": 32,
193
+ "झ": 33,
194
+ "ञ": 34,
195
+ "ट": 35,
196
+ "ठ": 36,
197
+ "ड": 37,
198
+ "ढ": 38,
199
+ "ण": 39,
200
+ "त": 40,
201
+ "थ": 41,
202
+ "द": 42,
203
+ "ध": 43,
204
+ "न": 44,
205
+ "प": 45,
206
+ "फ": 46,
207
+ "ब": 47,
208
+ "भ": 48,
209
+ "म": 49,
210
+ "य": 50,
211
+ "र": 51,
212
+ "ऱ": 52,
213
+ "ल": 53,
214
+ "व": 54,
215
+ "श": 55,
216
+ "ष": 56,
217
+ "स": 57,
218
+ "ह": 58,
219
+ "़": 59,
220
+ "ा": 60,
221
+ "ि": 61,
222
+ "ी": 62,
223
+ "ु": 63,
224
+ "ू": 64,
225
+ "ृ": 65,
226
+ "े": 66,
227
+ "ै": 67,
228
+ "ो": 68,
229
+ "ौ": 69,
230
+ "्": 70,
231
+ "ॐ": 71,
232
+ "॑": 72,
233
+ "ॠ": 73,
234
+ "०": 74,
235
+ "१": 75,
236
+ "२": 76,
237
+ "३": 77,
238
+ "४": 78,
239
+ "५": 79,
240
+ "६": 80,
241
+ "७": 81,
242
+ "८": 82,
243
+ "९": 83,
244
+ "॰": 84,
245
+ "‌": 85,
246
+ "‍": 86,
247
+ "‎": 87,
248
+ "–": 88,
249
+ "—": 89,
250
+ "’": 90
251
+ }
252
+ }