phongdtd commited on
Commit
adbe339
·
1 Parent(s): 3f682c7

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"g": 0, "": 1, "": 2, "i": 3, "ã": 4, "â": 5, "": 6, "": 7, "n": 8, "": 9, "": 10, "í": 11, "": 12, "": 13, "": 14, "": 15, "õ": 16, "ó": 17, "ơ": 18, "đ": 19, "y": 20, "": 21, "": 22, "": 23, "": 24, "ỵ": 25, "": 26, "l": 27, "à": 28, "": 29, "": 30, "ú": 31, "ũ": 32, "": 33, "u": 34, "w": 35, "ĩ": 36, "": 37, "b": 38, "f": 39, "": 40, "ý": 41, "": 42, "": 43, "": 44, "": 45, "ế": 46, "": 47, "": 48, "": 49, "": 50, "è": 51, "": 52, "k": 53, "m": 55, "d": 56, "": 57, "c": 58, "ì": 59, "o": 60, "x": 61, "z": 62, "ê": 63, "e": 64, "á": 65, "": 66, "": 67, "h": 68, "ô": 69, "": 70, "p": 71, "": 72, "ă": 73, "ò": 74, "v": 75, "": 76, "é": 77, "": 78, "": 79, "r": 80, "ù": 81, "": 82, "s": 83, "": 84, "t": 85, "": 86, "": 87, "j": 88, "": 89, "q": 90, "ư": 91, "a": 92, "": 93, "|": 54, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"ư": 0, "ý": 1, "": 2, "": 3, "ê": 4, "ó": 5, "": 6, "í": 7, "": 8, "c": 9, "t": 10, "": 11, "n": 12, "": 13, "é": 14, "u": 15, "ă": 16, "h": 17, "e": 18, "": 19, "": 20, "ù": 21, "a": 22, "è": 23, "": 24, "ỵ": 25, "": 26, "": 27, "y": 28, "": 29, "l": 30, "": 31, "ũ": 32, "i": 33, "x": 34, "": 35, "á": 36, "": 37, "r": 38, "": 39, "": 40, "b": 41, "": 42, "": 43, "": 44, "ĩ": 46, "": 47, "": 48, "": 49, "s": 50, "": 51, "g": 52, "ơ": 53, "": 54, "ú": 55, "": 56, "v": 57, "f": 58, "": 59, "o": 60, "ì": 61, "ế": 62, "": 63, "đ": 64, "": 65, "": 66, "": 67, "": 68, "": 69, "ò": 70, "ã": 71, "": 72, "õ": 73, "": 74, "z": 75, "": 76, "w": 77, "": 78, "": 79, "": 80, "k": 81, "": 82, "â": 83, "m": 84, "j": 85, "d": 86, "à": 87, "": 88, "ô": 89, "q": 90, "p": 91, "": 92, "": 93, "|": 45, "[UNK]": 94, "[PAD]": 95}