add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"o": 0, "ơ": 1, "ế": 2, "ỵ": 3, "ũ": 4, "ọ": 6, "k": 7, "ẩ": 8, "f": 9, "t": 10, "ó": 11, "ợ": 12, "q": 13, "ữ": 14, "c": 15, "ỳ": 16, "w": 17, "r": 18, "ỏ": 19, "ú": 20, "é": 21, "õ": 22, "ẽ": 23, "ỉ": 24, "ù": 25, "ộ": 26, "ừ": 27, "u": 28, "ã": 29, "ở": 30, "m": 31, "ă": 32, "á": 33, "ỷ": 34, "ệ": 35, "ồ": 36, "d": 37, "ẻ": 38, "s": 39, "ả": 40, "ề": 41, "ờ": 42, "ố": 43, "ẵ": 44, "ị": 45, "ì": 46, "ư": 47, "ằ": 48, "ẳ": 49, "l": 50, "j": 51, "ứ": 52, "p": 53, "ẹ": 54, "ê": 55, "e": 56, "z": 57, "ổ": 58, "đ": 59, "ặ": 60, "y": 61, "è": 62, "ĩ": 63, "i": 64, "ỡ": 65, "â": 66, "ý": 67, "ò": 68, "ầ": 69, "g": 70, "à": 71, "h": 72, "x": 73, "ễ": 74, "í": 75, "ỹ": 76, "ẫ": 77, "ô": 78, "ự": 79, "ắ": 80, "ậ": 81, "ụ": 82, "b": 83, "ử": 84, "ủ": 85, "ấ": 86, "ể": 87, "ớ": 88, "ỗ": 89, "ạ": 90, "v": 91, "n": 92, "a": 93, "|": 5, "[UNK]": 94, "[PAD]": 95}
|