nawta commited on
Commit
0ac3838
·
1 Parent(s): 1cdd7fe

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"R": 0, "CH": 1, "B": 2, "I:": 3, "W": 4, "BY": 5, "Q": 6, "I": 7, "U:": 8, "A:": 9, "U": 10, "HY": 11, "A": 12, "E": 13, "TS": 14, "O": 15, "Y": 16, "D": 17, "G": 18, "F": 19, "O:": 20, "J": 21, "E:": 22, "P": 23, "S": 24, "RY": 25, "TY": 26, "KY": 27, "Z": 28, "K": 29, "N!": 30, "GY": 31, "M": 32, "T": 33, "N!:": 34, "PY": 35, "SH": 36, "Q:": 37, "H": 38, "[UNK]": 39, "[PAD]": 40, " ": 41, "|": 42}