add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ư": 0, "ý": 1, "ặ": 2, "ỡ": 3, "ê": 4, "ó": 5, "ụ": 6, "í": 7, "ỳ": 8, "c": 9, "t": 10, "ỗ": 11, "n": 12, "ố": 13, "é": 14, "u": 15, "ă": 16, "h": 17, "e": 18, "ỷ": 19, "ổ": 20, "ù": 21, "a": 22, "è": 23, "ị": 24, "ỵ": 25, "ấ": 26, "ễ": 27, "y": 28, "ẳ": 29, "l": 30, "ể": 31, "ũ": 32, "i": 33, "x": 34, "ẹ": 35, "á": 36, "ẽ": 37, "r": 38, "ẵ": 39, "ỉ": 40, "b": 41, "ẩ": 42, "ạ": 43, "ỹ": 44, "ĩ": 46, "ẫ": 47, "ọ": 48, "ử": 49, "s": 50, "ỏ": 51, "g": 52, "ơ": 53, "ứ": 54, "ú": 55, "ậ": 56, "v": 57, "f": 58, "ở": 59, "o": 60, "ì": 61, "ế": 62, "ằ": 63, "đ": 64, "ệ": 65, "ề": 66, "ờ": 67, "ộ": 68, "ừ": 69, "ò": 70, "ã": 71, "ầ": 72, "õ": 73, "ủ": 74, "z": 75, "ữ": 76, "w": 77, "ự": 78, "ồ": 79, "ẻ": 80, "k": 81, "ớ": 82, "â": 83, "m": 84, "j": 85, "d": 86, "à": 87, "ắ": 88, "ô": 89, "q": 90, "p": 91, "ợ": 92, "ả": 93, "|": 45, "[UNK]": 94, "[PAD]": 95}
|