add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ẻ": 0, "a": 1, "ự": 2, "ỵ": 3, "á": 4, "ằ": 5, "b": 6, "ố": 7, "đ": 8, "g": 9, "p": 10, "s": 11, "ỹ": 12, "ọ": 13, "ủ": 14, "ẫ": 15, "ẹ": 16, "ò": 17, "à": 18, "ớ": 19, "â": 20, "v": 21, "ă": 22, "ữ": 23, "o": 24, "j": 25, "ợ": 26, "ễ": 27, "ể": 28, "ĩ": 29, "t": 30, "ó": 31, "ù": 32, "ỡ": 33, "h": 34, "ơ": 35, "ồ": 36, "ứ": 37, "l": 38, "ẩ": 39, "ẽ": 40, "ị": 41, "n": 42, "q": 43, "ở": 44, "r": 45, "x": 46, "õ": 47, "ề": 48, "ô": 49, "ổ": 50, "ê": 51, "ầ": 52, "ã": 53, "i": 54, "ũ": 55, "ạ": 56, "ử": 57, "y": 58, "ấ": 59, "é": 60, "ẳ": 61, "ư": 62, "ộ": 63, "ỉ": 64, "m": 66, "ế": 67, "ụ": 68, "ắ": 69, "e": 70, "w": 71, "c": 72, "ì": 73, "ỷ": 74, "ờ": 75, "z": 76, "k": 77, "è": 78, "í": 79, "ỏ": 80, "ý": 81, "ú": 82, "ậ": 83, "ừ": 84, "u": 85, "ả": 86, "ỗ": 87, "d": 88, "f": 89, "ẵ": 90, "ệ": 91, "ỳ": 92, "ặ": 93, "|": 65, "[UNK]": 94, "[PAD]": 95}
|