add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"R": 0, "CH": 1, "B": 2, "I:": 3, "W": 4, "BY": 5, "Q": 6, "I": 7, "U:": 8, "A:": 9, "U": 10, "HY": 11, "A": 12, "E": 13, "TS": 14, "O": 15, "Y": 16, "D": 17, "G": 18, "F": 19, "O:": 20, "J": 21, "E:": 22, "P": 23, "S": 24, "RY": 25, "TY": 26, "KY": 27, "Z": 28, "K": 29, "N!": 30, "GY": 31, "M": 32, "T": 33, "N!:": 34, "PY": 35, "SH": 36, "Q:": 37, "H": 38, "[UNK]": 39, "[PAD]": 40, " ": 41, "|": 42}
|