phongdtd commited on
Commit
e57eece
·
1 Parent(s): ad14892

add tokenizer

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. vocab.json +1 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
vocab.json CHANGED
@@ -1 +1 @@
1
- {"o": 0, "ơ": 1, "ế": 2, "": 3, "ũ": 4, "": 6, "k": 7, "": 8, "f": 9, "t": 10, "ó": 11, "ợ": 12, "q": 13, "": 14, "c": 15, "": 16, "w": 17, "r": 18, "": 19, "ú": 20, "é": 21, "õ": 22, "": 23, "": 24, "ù": 25, "": 26, "": 27, "u": 28, "ã": 29, "": 30, "m": 31, "ă": 32, "á": 33, "": 34, "": 35, "": 36, "d": 37, "": 38, "s": 39, "": 40, "": 41, "": 42, "": 43, "": 44, "": 45, "ì": 46, "ư": 47, "": 48, "": 49, "l": 50, "j": 51, "": 52, "p": 53, "": 54, "ê": 55, "e": 56, "z": 57, "": 58, "đ": 59, "": 60, "y": 61, "è": 62, "ĩ": 63, "i": 64, "": 65, "â": 66, "ý": 67, "ò": 68, "": 69, "g": 70, "à": 71, "h": 72, "x": 73, "": 74, "í": 75, "": 76, "": 77, "ô": 78, "": 79, "": 80, "": 81, "": 82, "b": 83, "": 84, "": 85, "": 86, "ể": 87, "": 88, "": 89, "": 90, "v": 91, "n": 92, "a": 93, "|": 5, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"": 0, "": 1, "": 2, "d": 3, "ơ": 4, "w": 5, "s": 6, "ũ": 7, "c": 8, "é": 9, "": 10, "í": 11, "ợ": 12, "": 13, "": 14, "": 15, "ô": 16, "ù": 17, "r": 18, "m": 19, "": 20, "â": 21, "j": 22, "": 23, "": 24, "ì": 25, "ý": 26, "": 27, "g": 28, "": 29, "": 30, "h": 31, "": 32, "": 33, "": 34, "e": 35, "à": 36, "": 37, "l": 38, "z": 39, "è": 40, "ă": 41, "": 42, "": 43, "ê": 44, "": 45, "": 46, "u": 47, "ĩ": 48, "õ": 49, "b": 50, "á": 51, "": 52, "o": 53, "": 54, "": 55, "": 56, "": 57, "": 58, "": 59, "k": 60, "": 61, "": 62, "": 63, "": 64, "": 65, "p": 66, "": 67, "ã": 68, "ế": 69, "a": 70, "ó": 71, "": 72, "x": 73, "n": 74, "t": 75, "ú": 76, "f": 77, "": 78, "q": 79, "": 80, "v": 81, "": 82, "ò": 83, "": 84, "": 85, "đ": 87, "": 88, "ư": 89, "y": 90, "": 91, "i": 92, "": 93, "|": 86, "[UNK]": 94, "[PAD]": 95}