phongdtd commited on
Commit
0980f23
·
1 Parent(s): 147d9be

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ỹ": 0, "f": 1, "ự": 2, "ỷ": 3, "ế": 4, "à": 5, "ỗ": 6, "b": 7, "ư": 8, "c": 9, "ẽ": 10, "õ": 11, "u": 12, "ẻ": 13, "ô": 14, "ỉ": 15, "á": 16, "ớ": 17, "v": 18, "ỡ": 19, "â": 21, "í": 22, "ơ": 23, "ắ": 24, "h": 25, "ấ": 26, "ằ": 27, "ệ": 28, "ù": 29, "m": 30, "ê": 31, "ầ": 32, "ữ": 33, "ề": 34, "ể": 35, "ễ": 36, "z": 37, "ý": 38, "x": 39, "e": 40, "o": 41, "ì": 42, "n": 43, "ậ": 44, "ọ": 45, "ặ": 46, "ả": 47, "g": 48, "w": 49, "s": 50, "è": 51, "ị": 52, "d": 53, "ó": 54, "ĩ": 55, "ụ": 56, "ộ": 57, "ủ": 58, "ỳ": 59, "p": 60, "ứ": 61, "ẩ": 62, "r": 63, "ỵ": 64, "ợ": 65, "ẵ": 66, "é": 67, "ũ": 68, "ò": 69, "ừ": 70, "i": 71, "ạ": 72, "ẳ": 73, "y": 74, "t": 75, "ổ": 76, "ú": 77, "j": 78, "ồ": 79, "k": 80, "l": 81, "q": 82, "ố": 83, "a": 84, "ã": 85, "ẫ": 86, "ử": 87, "ở": 88, "ờ": 89, "ẹ": 90, "ỏ": 91, "đ": 92, "ă": 93, "|": 20, "[UNK]": 94, "[PAD]": 95}