add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ữ": 0, "ầ": 1, "ệ": 2, "ẵ": 3, "ĩ": 4, "ọ": 5, "ỉ": 6, "m": 7, "o": 8, "ử": 9, "ồ": 10, "ẹ": 11, "ấ": 12, "y": 13, "ũ": 14, "ị": 15, "ẻ": 16, "s": 17, "ớ": 18, "ả": 19, "ố": 20, "ỏ": 21, "ô": 22, "ạ": 23, "đ": 24, "ù": 25, "b": 26, "ỵ": 27, "ẩ": 28, "l": 29, "t": 30, "j": 31, "ă": 32, "ỳ": 33, "ề": 34, "f": 35, "ẳ": 36, "ó": 37, "ừ": 38, "ở": 39, "h": 40, "k": 41, "e": 42, "c": 43, "ụ": 45, "v": 46, "ờ": 47, "ễ": 48, "ỡ": 49, "p": 50, "ơ": 51, "ộ": 52, "z": 53, "é": 54, "ứ": 55, "ẫ": 56, "ế": 57, "ú": 58, "ể": 59, "r": 60, "ý": 61, "g": 62, "õ": 63, "q": 64, "n": 65, "ủ": 66, "í": 67, "ặ": 68, "ã": 69, "u": 70, "è": 71, "ự": 72, "ì": 73, "a": 74, "w": 75, "d": 76, "ò": 77, "ỗ": 78, "ẽ": 79, "ỹ": 80, "i": 81, "ư": 82, "ỷ": 83, "á": 84, "â": 85, "à": 86, "x": 87, "ậ": 88, "ắ": 89, "ằ": 90, "ợ": 91, "ổ": 92, "ê": 93, "|": 44, "[UNK]": 94, "[PAD]": 95}
|