phongdtd commited on
Commit
ad14892
·
1 Parent(s): 0ef9432

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"": 0, "": 1, "n": 2, "": 3, "": 4, "õ": 5, "ắ": 6, "o": 7, "ã": 8, "q": 9, "": 10, "": 11, "l": 12, "m": 13, "f": 14, "ơ": 15, "ê": 16, "ó": 17, "": 18, "x": 19, "": 20, "ò": 22, "": 23, "k": 24, "ĩ": 25, "": 26, "y": 27, "â": 28, "é": 29, "": 30, "": 31, "è": 32, "à": 33, "ư": 34, "ý": 35, "j": 36, "á": 37, "e": 38, "u": 39, "i": 40, "b": 41, "d": 42, "": 43, "": 44, "": 45, "": 46, "a": 47, "ô": 48, "": 49, "": 50, "": 51, "": 52, "": 53, "": 54, "ũ": 55, "ế": 56, "t": 57, "c": 58, "": 59, "": 60, "ì": 61, "": 62, "": 63, "g": 64, "ù": 65, "ú": 66, "r": 67, "w": 68, "v": 69, "": 70, "": 71, "h": 72, "í": 73, "": 74, "": 75, "s": 76, "đ": 77, "": 78, "ự": 79, "p": 80, "": 81, "": 82, "": 83, "": 84, "": 85, "": 86, "": 87, "": 88, "ă": 89, "z": 90, "": 91, "": 92, "": 93, "|": 21, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"o": 0, "ơ": 1, "ế": 2, "": 3, "ũ": 4, "": 6, "k": 7, "": 8, "f": 9, "t": 10, "ó": 11, "": 12, "q": 13, "": 14, "c": 15, "": 16, "w": 17, "r": 18, "": 19, "ú": 20, "é": 21, "õ": 22, "": 23, "": 24, "ù": 25, "": 26, "": 27, "u": 28, "ã": 29, "": 30, "m": 31, "ă": 32, "á": 33, "": 34, "": 35, "": 36, "d": 37, "": 38, "s": 39, "": 40, "": 41, "": 42, "": 43, "": 44, "": 45, "ì": 46, "ư": 47, "": 48, "": 49, "l": 50, "j": 51, "": 52, "p": 53, "": 54, "ê": 55, "e": 56, "z": 57, "": 58, "đ": 59, "": 60, "y": 61, "è": 62, "ĩ": 63, "i": 64, "": 65, "â": 66, "ý": 67, "ò": 68, "": 69, "g": 70, "à": 71, "h": 72, "x": 73, "": 74, "í": 75, "": 76, "": 77, "ô": 78, "ự": 79, "": 80, "": 81, "": 82, "b": 83, "": 84, "": 85, "": 86, "": 87, "": 88, "": 89, "": 90, "v": 91, "n": 92, "a": 93, "|": 5, "[UNK]": 94, "[PAD]": 95}