phongdtd commited on
Commit
e8fd0f1
·
1 Parent(s): da8a330

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"": 0, "ũ": 1, "": 2, "": 3, "ò": 4, "ô": 5, "ú": 6, "â": 7, "": 8, "b": 9, "": 10, "r": 11, "": 12, "j": 13, "": 14, "a": 15, "ù": 16, "x": 17, "": 18, "ă": 19, "": 20, "p": 21, "à": 22, "ộ": 23, "ế": 24, "m": 25, "ê": 26, "": 27, "ơ": 28, "": 29, "w": 30, "": 31, "": 32, "ư": 33, "e": 34, "k": 35, "": 36, "d": 37, "": 38, "õ": 39, "v": 40, "t": 41, "s": 42, "c": 43, "q": 44, "": 45, "": 46, "": 47, "": 48, "": 49, "": 50, "ó": 51, "": 52, "ồ": 53, "ắ": 54, "f": 55, "á": 56, "": 57, "": 58, "": 59, "": 60, "": 61, "": 62, "è": 63, "": 64, "": 65, "": 66, "i": 67, "z": 68, "é": 69, "ì": 70, "đ": 71, "ý": 72, "ã": 74, "": 75, "": 76, "": 77, "y": 78, "ĩ": 79, "í": 80, "": 81, "": 82, "": 83, "": 84, "n": 85, "u": 86, "h": 87, "o": 88, "l": 89, "": 90, "": 91, "": 92, "g": 93, "|": 73, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"": 0, "": 1, "w": 2, "i": 3, "h": 4, "": 5, "ế": 6, "": 7, "": 8, "ó": 9, "v": 10, "": 11, "r": 12, "ă": 13, "": 14, "": 15, "": 16, "": 17, "á": 18, "s": 19, "p": 20, "n": 21, "o": 22, "ộ": 23, "z": 24, "": 25, "u": 26, "g": 27, "d": 28, "": 29, "q": 30, "": 31, "õ": 32, "a": 33, "": 34, "ô": 35, "": 36, "": 37, "": 38, "": 39, "x": 40, "c": 41, "": 42, "j": 43, "": 44, "": 45, "": 46, "b": 47, "": 48, "": 49, "": 50, "à": 51, "ã": 53, "ồ": 54, "": 55, "ê": 56, "": 57, "": 58, "t": 59, "": 60, "é": 61, "ò": 62, "ú": 63, "ý": 64, "": 65, "y": 66, "ĩ": 67, "": 68, "k": 69, "": 70, "f": 71, "â": 72, "": 73, "ấ": 74, "e": 75, "l": 76, "": 77, "": 78, "": 79, "ơ": 80, "í": 81, "": 82, "ũ": 83, "è": 84, "": 85, "ù": 86, "m": 87, "": 88, "": 89, "": 90, "ư": 91, "ì": 92, "đ": 93, "|": 52, "[UNK]": 94, "[PAD]": 95}