bishaltwr commited on
Commit
06f6cf9
·
verified ·
1 Parent(s): c021cb9

Update vocab.json

Browse files
Files changed (1) hide show
  1. vocab.json +253 -253
vocab.json CHANGED
@@ -1,253 +1,253 @@
1
- {
2
- "eng": {
3
- "!": 75,
4
- "\"": 62,
5
- "$": 48,
6
- "%": 47,
7
- "&": 57,
8
- "'": 31,
9
- "+": 56,
10
- ",": 44,
11
- "-": 32,
12
- ".": 37,
13
- "/": 45,
14
- "0": 27,
15
- "1": 30,
16
- "2": 35,
17
- "3": 41,
18
- "4": 39,
19
- "5": 38,
20
- "6": 40,
21
- "7": 43,
22
- "8": 42,
23
- "9": 36,
24
- ":": 46,
25
- ";": 52,
26
- "</s>": 2,
27
- "<pad>": 0,
28
- "<s>": 1,
29
- "<unk>": 3,
30
- "[": 59,
31
- "]": 60,
32
- "`": 92,
33
- "a": 7,
34
- "b": 24,
35
- "c": 16,
36
- "d": 15,
37
- "e": 5,
38
- "f": 19,
39
- "g": 21,
40
- "h": 13,
41
- "i": 9,
42
- "j": 29,
43
- "k": 26,
44
- "l": 14,
45
- "m": 18,
46
- "n": 10,
47
- "o": 8,
48
- "p": 20,
49
- "q": 34,
50
- "r": 12,
51
- "s": 11,
52
- "t": 6,
53
- "u": 17,
54
- "v": 25,
55
- "w": 23,
56
- "x": 28,
57
- "y": 22,
58
- "z": 33,
59
- "|": 4,
60
- "£": 68,
61
- "¥": 53,
62
- "§": 139,
63
- "°": 58,
64
- "²": 76,
65
- "ß": 82,
66
- "à": 84,
67
- "á": 49,
68
- "â": 81,
69
- "ã": 66,
70
- "ä": 78,
71
- "å": 123,
72
- "æ": 110,
73
- "ç": 64,
74
- "è": 79,
75
- "é": 55,
76
- "ê": 88,
77
- "ë": 87,
78
- "í": 50,
79
- "î": 97,
80
- "ï": 85,
81
- "ð": 98,
82
- "ñ": 83,
83
- "ò": 105,
84
- "ó": 67,
85
- "ô": 86,
86
- "õ": 69,
87
- "ö": 74,
88
- "ø": 91,
89
- "ú": 70,
90
- "û": 99,
91
- "ü": 54,
92
- "ý": 112,
93
- "þ": 132,
94
- "ā": 100,
95
- "ă": 102,
96
- "ć": 94,
97
- "č": 89,
98
- "ē": 143,
99
- "ę": 119,
100
- "ě": 140,
101
- "ğ": 153,
102
- "ī": 107,
103
- "ı": 103,
104
- "ł": 73,
105
- "ń": 118,
106
- "ň": 137,
107
- "ō": 80,
108
- "ő": 120,
109
- "œ": 96,
110
- "ř": 108,
111
- "ş": 116,
112
- "š": 90,
113
- "ū": 101,
114
- "ž": 95,
115
- "ș": 104,
116
- "ə": 138,
117
- "ʻ": 115,
118
- "́": 93,
119
- "̇": 77,
120
- "α": 106,
121
- "κ": 109,
122
- "π": 117,
123
- "χ": 141,
124
- "а": 144,
125
- "в": 124,
126
- "е": 125,
127
- "з": 130,
128
- "и": 113,
129
- "й": 127,
130
- "к": 114,
131
- "л": 126,
132
- "н": 128,
133
- "ь": 131,
134
- "я": 129,
135
- "נ": 147,
136
- "ע": 148,
137
- "ṃ": 121,
138
- "ạ": 111,
139
- "ả": 145,
140
- "ị": 146,
141
- "ụ": 122,
142
- "​": 71,
143
- "–": 61,
144
- "—": 51,
145
- "‘": 72,
146
- "’": 65,
147
- "”": 63,
148
- "≡": 142,
149
- "京": 149,
150
- "先": 135,
151
- "大": 151,
152
- "尚": 134,
153
- "时": 133,
154
- "生": 136,
155
- "都": 150,
156
- "阪": 152
157
- },
158
- "npi": {
159
- "(": 1,
160
- ")": 2,
161
- "/": 3,
162
- "[PAD]": 92,
163
- "[UNK]": 91,
164
- "a": 4,
165
- "b": 5,
166
- "c": 6,
167
- "e": 7,
168
- "f": 8,
169
- "k": 9,
170
- "o": 10,
171
- "|": 0,
172
- "ँ": 11,
173
- "ं": 12,
174
- "ः": 13,
175
- "अ": 14,
176
- "आ": 15,
177
- "इ": 16,
178
- "ई": 17,
179
- "उ": 18,
180
- "ऊ": 19,
181
- "ऋ": 20,
182
- "ए": 21,
183
- "ऐ": 22,
184
- "ओ": 23,
185
- "औ": 24,
186
- "क": 25,
187
- "ख": 26,
188
- "ग": 27,
189
- "घ": 28,
190
- "ङ": 29,
191
- "च": 30,
192
- "छ": 31,
193
- "ज": 32,
194
- "झ": 33,
195
- "ञ": 34,
196
- "ट": 35,
197
- "ठ": 36,
198
- "ड": 37,
199
- "ढ": 38,
200
- "ण": 39,
201
- "त": 40,
202
- "थ": 41,
203
- "द": 42,
204
- "ध": 43,
205
- "न": 44,
206
- "प": 45,
207
- "फ": 46,
208
- "ब": 47,
209
- "भ": 48,
210
- "म": 49,
211
- "य": 50,
212
- "र": 51,
213
- "ऱ": 52,
214
- "ल": 53,
215
- "व": 54,
216
- "श": 55,
217
- "ष": 56,
218
- "स": 57,
219
- "ह": 58,
220
- "़": 59,
221
- "ा": 60,
222
- "ि": 61,
223
- "ी": 62,
224
- "ु": 63,
225
- "ू": 64,
226
- "ृ": 65,
227
- "े": 66,
228
- "ै": 67,
229
- "ो": 68,
230
- "ौ": 69,
231
- "्": 70,
232
- "ॐ": 71,
233
- "॑": 72,
234
- "ॠ": 73,
235
- "०": 74,
236
- "१": 75,
237
- "२": 76,
238
- "३": 77,
239
- "४": 78,
240
- "५": 79,
241
- "६": 80,
242
- "७": 81,
243
- "८": 82,
244
- "९": 83,
245
- "॰": 84,
246
- "‌": 85,
247
- "‍": 86,
248
- "‎": 87,
249
- "–": 88,
250
- "—": 89,
251
- "’": 90
252
- }
253
- }
 
1
+ {
2
+ "eng": {
3
+ "!": 75,
4
+ "\"": 62,
5
+ "$": 48,
6
+ "%": 47,
7
+ "&": 57,
8
+ "'": 31,
9
+ "+": 56,
10
+ ",": 44,
11
+ "-": 32,
12
+ ".": 37,
13
+ "/": 45,
14
+ "0": 27,
15
+ "1": 30,
16
+ "2": 35,
17
+ "3": 41,
18
+ "4": 39,
19
+ "5": 38,
20
+ "6": 40,
21
+ "7": 43,
22
+ "8": 42,
23
+ "9": 36,
24
+ ":": 46,
25
+ ";": 52,
26
+ "</s>": 2,
27
+ "<pad>": 0,
28
+ "<s>": 1,
29
+ "<unk>": 3,
30
+ "[": 59,
31
+ "]": 60,
32
+ "`": 92,
33
+ "a": 7,
34
+ "b": 24,
35
+ "c": 16,
36
+ "d": 15,
37
+ "e": 5,
38
+ "f": 19,
39
+ "g": 21,
40
+ "h": 13,
41
+ "i": 9,
42
+ "j": 29,
43
+ "k": 26,
44
+ "l": 14,
45
+ "m": 18,
46
+ "n": 10,
47
+ "o": 8,
48
+ "p": 20,
49
+ "q": 34,
50
+ "r": 12,
51
+ "s": 11,
52
+ "t": 6,
53
+ "u": 17,
54
+ "v": 25,
55
+ "w": 23,
56
+ "x": 28,
57
+ "y": 22,
58
+ "z": 33,
59
+ "|": 4,
60
+ "£": 68,
61
+ "¥": 53,
62
+ "§": 139,
63
+ "°": 58,
64
+ "²": 76,
65
+ "ß": 82,
66
+ "à": 84,
67
+ "á": 49,
68
+ "â": 81,
69
+ "ã": 66,
70
+ "ä": 78,
71
+ "å": 123,
72
+ "æ": 110,
73
+ "ç": 64,
74
+ "è": 79,
75
+ "é": 55,
76
+ "ê": 88,
77
+ "ë": 87,
78
+ "í": 50,
79
+ "î": 97,
80
+ "ï": 85,
81
+ "ð": 98,
82
+ "ñ": 83,
83
+ "ò": 105,
84
+ "ó": 67,
85
+ "ô": 86,
86
+ "õ": 69,
87
+ "ö": 74,
88
+ "ø": 91,
89
+ "ú": 70,
90
+ "û": 99,
91
+ "ü": 54,
92
+ "ý": 112,
93
+ "þ": 132,
94
+ "ā": 100,
95
+ "ă": 102,
96
+ "ć": 94,
97
+ "č": 89,
98
+ "ē": 143,
99
+ "ę": 119,
100
+ "ě": 140,
101
+ "ğ": 153,
102
+ "ī": 107,
103
+ "ı": 103,
104
+ "ł": 73,
105
+ "ń": 118,
106
+ "ň": 137,
107
+ "ō": 80,
108
+ "ő": 120,
109
+ "œ": 96,
110
+ "ř": 108,
111
+ "ş": 116,
112
+ "š": 90,
113
+ "ū": 101,
114
+ "ž": 95,
115
+ "ș": 104,
116
+ "ə": 138,
117
+ "ʻ": 115,
118
+ "́": 93,
119
+ "̇": 77,
120
+ "α": 106,
121
+ "κ": 109,
122
+ "π": 117,
123
+ "χ": 141,
124
+ "а": 144,
125
+ "в": 124,
126
+ "е": 125,
127
+ "з": 130,
128
+ "и": 113,
129
+ "й": 127,
130
+ "к": 114,
131
+ "л": 126,
132
+ "н": 128,
133
+ "ь": 131,
134
+ "я": 129,
135
+ "נ": 147,
136
+ "ע": 148,
137
+ "ṃ": 121,
138
+ "ạ": 111,
139
+ "ả": 145,
140
+ "ị": 146,
141
+ "ụ": 122,
142
+ "​": 71,
143
+ "–": 61,
144
+ "—": 51,
145
+ "‘": 72,
146
+ "’": 65,
147
+ "”": 63,
148
+ "≡": 142,
149
+ "京": 149,
150
+ "先": 135,
151
+ "大": 151,
152
+ "尚": 134,
153
+ "时": 133,
154
+ "生": 136,
155
+ "都": 150,
156
+ "阪": 152
157
+ },
158
+ "npi": {
159
+ "(": 1,
160
+ ")": 2,
161
+ "/": 3,
162
+ "[pad]": 92,
163
+ "[unk]": 91,
164
+ "a": 4,
165
+ "b": 5,
166
+ "c": 6,
167
+ "e": 7,
168
+ "f": 8,
169
+ "k": 9,
170
+ "o": 10,
171
+ "|": 0,
172
+ "ँ": 11,
173
+ "ं": 12,
174
+ "ः": 13,
175
+ "अ": 14,
176
+ "आ": 15,
177
+ "इ": 16,
178
+ "ई": 17,
179
+ "उ": 18,
180
+ "ऊ": 19,
181
+ "ऋ": 20,
182
+ "ए": 21,
183
+ "ऐ": 22,
184
+ "ओ": 23,
185
+ "औ": 24,
186
+ "क": 25,
187
+ "ख": 26,
188
+ "ग": 27,
189
+ "घ": 28,
190
+ "ङ": 29,
191
+ "च": 30,
192
+ "छ": 31,
193
+ "ज": 32,
194
+ "झ": 33,
195
+ "ञ": 34,
196
+ "ट": 35,
197
+ "ठ": 36,
198
+ "ड": 37,
199
+ "ढ": 38,
200
+ "ण": 39,
201
+ "त": 40,
202
+ "थ": 41,
203
+ "द": 42,
204
+ "ध": 43,
205
+ "न": 44,
206
+ "प": 45,
207
+ "फ": 46,
208
+ "ब": 47,
209
+ "भ": 48,
210
+ "म": 49,
211
+ "य": 50,
212
+ "र": 51,
213
+ "ऱ": 52,
214
+ "ल": 53,
215
+ "व": 54,
216
+ "श": 55,
217
+ "ष": 56,
218
+ "स": 57,
219
+ "ह": 58,
220
+ "़": 59,
221
+ "ा": 60,
222
+ "ि": 61,
223
+ "ी": 62,
224
+ "ु": 63,
225
+ "ू": 64,
226
+ "ृ": 65,
227
+ "े": 66,
228
+ "ै": 67,
229
+ "ो": 68,
230
+ "ौ": 69,
231
+ "्": 70,
232
+ "ॐ": 71,
233
+ "॑": 72,
234
+ "ॠ": 73,
235
+ "०": 74,
236
+ "१": 75,
237
+ "२": 76,
238
+ "३": 77,
239
+ "४": 78,
240
+ "५": 79,
241
+ "६": 80,
242
+ "७": 81,
243
+ "८": 82,
244
+ "९": 83,
245
+ "॰": 84,
246
+ "‌": 85,
247
+ "‍": 86,
248
+ "‎": 87,
249
+ "–": 88,
250
+ "—": 89,
251
+ "’": 90
252
+ }
253
+ }