phongdtd commited on
Commit
cc8567b
·
1 Parent(s): e8fd0f1

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"": 0, "": 1, "w": 2, "i": 3, "h": 4, "": 5, "ế": 6, "": 7, "": 8, "ó": 9, "v": 10, "": 11, "r": 12, "ă": 13, "": 14, "": 15, "": 16, "": 17, "á": 18, "s": 19, "p": 20, "n": 21, "o": 22, "": 23, "z": 24, "": 25, "u": 26, "g": 27, "d": 28, "": 29, "q": 30, "": 31, "õ": 32, "a": 33, "ẫ": 34, "ô": 35, "": 36, "": 37, "": 38, "": 39, "x": 40, "c": 41, "": 42, "j": 43, "": 44, "": 45, "": 46, "b": 47, "": 48, "": 49, "": 50, "à": 51, "ã": 53, "": 54, "": 55, "ê": 56, "": 57, "": 58, "t": 59, "": 60, "é": 61, "ò": 62, "ú": 63, "ý": 64, "": 65, "y": 66, "ĩ": 67, "": 68, "k": 69, "": 70, "f": 71, "â": 72, "": 73, "": 74, "e": 75, "l": 76, "": 77, "": 78, "": 79, "ơ": 80, "í": 81, "": 82, "ũ": 83, "è": 84, "": 85, "ù": 86, "m": 87, "": 88, "": 89, "": 90, "ư": 91, "ì": 92, "đ": 93, "|": 52, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"": 0, "": 1, "m": 2, "è": 3, "y": 4, "ă": 5, "q": 6, "": 7, "": 8, "": 9, "": 10, "": 11, "í": 12, "": 13, "s": 14, "d": 15, "": 16, "é": 17, "j": 18, "": 19, "o": 20, "": 21, "": 22, "": 23, "": 24, "": 25, "i": 26, "": 27, "ù": 28, "": 29, "": 30, "": 31, "": 32, "ã": 34, "á": 35, "": 36, "": 37, "w": 38, "v": 39, "â": 40, "ũ": 41, "ế": 42, "a": 43, "": 44, "ý": 45, "": 46, "c": 47, "": 48, "": 49, "t": 50, "ĩ": 51, "u": 52, "ệ": 53, "": 54, "": 55, "ư": 56, "r": 57, "": 58, "": 59, "à": 60, "ơ": 61, "õ": 62, "ó": 63, "": 64, "ê": 65, "e": 66, "đ": 67, "ì": 68, "": 69, "": 70, "f": 71, "l": 72, "": 73, "": 74, "": 75, "b": 76, "": 77, "p": 78, "z": 79, "ò": 80, "n": 81, "ô": 82, "k": 83, "x": 84, "": 85, "": 86, "": 87, "": 88, "h": 89, "g": 90, "": 91, "": 92, "ú": 93, "|": 33, "[UNK]": 94, "[PAD]": 95}