xtts-ru-ipa / vocab.json
omogr's picture
Upload 4 files
b93931d verified
raw
history blame
No virus
11.4 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[STOP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SPACE]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[START]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[ru]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "[S]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "[T]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "[O]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "[P]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": null,
"lowercase": false
},
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"post_processor": null,
"decoder": {
"type": "BPEDecoder",
"suffix": ""
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": "",
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"[STOP]": 0,
"[UNK]": 1,
"[SPACE]": 2,
"[START]": 3,
"[ru]": 4,
"[S]": 5,
"[T]": 6,
"[O]": 7,
"[P]": 8,
"!": 9,
",": 10,
".": 11,
"?": 12,
"`": 13,
"a": 14,
"b": 15,
"d": 16,
"e": 17,
"f": 18,
"i": 19,
"j": 20,
"k": 21,
"l": 22,
"m": 23,
"n": 24,
"o": 25,
"p": 26,
"r": 27,
"s": 28,
"t": 29,
"u": 30,
"v": 31,
"x": 32,
"z": 33,
"æ": 34,
"ɐ": 35,
"ɕ": 36,
"ə": 37,
"ɛ": 38,
"ɡ": 39,
"ɨ": 40,
"ɪ": 41,
"ɫ": 42,
"ɵ": 43,
"ʂ": 44,
"ʉ": 45,
"ʊ": 46,
"ʐ": 47,
"ʲ": 48,
"ː": 49,
"ʲɪ": 50,
"lʲ": 51,
"nʲɪ": 52,
"tʲ": 53,
"sʲ": 54,
"st": 55,
"nə": 56,
"rʲɪ": 57,
"tɕ": 58,
"və": 59,
"nʲ": 60,
"jɪ": 61,
"ts": 62,
"tʲɪ": 63,
"jə": 64,
"lʲɪ": 65,
"dʲ": 66,
"tə": 67,
"rʲ": 68,
"rɐ": 69,
"kə": 70,
"je": 71,
"rə": 72,
"pɐ": 73,
"vʲ": 74,
"mʲɪ": 75,
"prʲɪ": 76,
"mʲ": 77,
"tɕɪ": 78,
"ɫə": 79,
"dʲɪ": 80,
"jʊ": 81,
"vʲɪ": 82,
"ɕː": 83,
"sʲɪ": 84,
"ɛtə": 85,
"tsɨ": 86,
"kɐ": 87,
"nɐ": 88,
"pə": 89,
"ja": 90,
"nɨ": 91,
"nː": 92,
"ʐɨ": 93,
"vɐ": 94,
"zə": 95,
"dɐ": 96,
"ɡə": 97,
"kʲɪ": 98,
"enʲɪ": 99,
"də": 100,
"sʲtʲ": 101,
"ɕːɪ": 102,
"stɐ": 103,
"ʂt": 104,
"mə": 105,
"sɐ": 106,
"tʲɪlʲ": 107,
"prɐ": 108,
"zɐ": 109,
"sə": 110,
"ʂɨ": 111,
"ɫɐ": 112,
"pʲ": 113,
"jɪt": 114,
"sʲtʲɪ": 115,
"tsə": 116,
"bʲɪ": 117,
"ɐt": 118,
"pʲɪ": 119,
"as": 120,
"nːə": 121,
"ak": 122,
"sɫ": 123,
"jɪv": 124,
"zʲ": 125,
"stvə": 126,
"jɪm": 127,
"ɐr": 128,
"aɫ": 129,
"atsɨ": 130,
"rʊ": 131,
"et": 132,
"ovə": 133,
"nʲɪjə": 134,
"dʊ": 135,
"tɐ": 136,
"vɨ": 137,
"mɐ": 138,
"skə": 139,
"olʲ": 140,
"pr": 141,
"xɐ": 142,
"prə": 143,
"alʲ": 144,
"bʲ": 145,
"on": 146,
"itʲɪlʲ": 147,
"sʊ": 148,
"sʲə": 149,
"er": 150,
"nːɨ": 151,
"ot": 152,
"mʊ": 153,
"oj": 154,
"am": 155,
"at": 156,
"kʊ": 157,
"fsʲ": 158,
"atʲ": 159,
"ɐb": 160,
"prʲɪt": 161,
"an": 162,
"ej": 163,
"tsː": 164,
"nəjə": 165,
"enʲɪjə": 166,
"ɡʲɪ": 167,
"ʂto": 168,
"jɪtsə": 169,
"bɨ": 170,
"skʲɪ": 171,
"tʲe": 172,
"or": 173,
"jɵ": 174,
"kʲ": 175,
"dlʲ": 176,
"itɕɪ": 177,
"nəvə": 178,
"en": 179,
"fʲɪ": 180,
"ilʲɪ": 181,
"oɫ": 182,
"ɡɐ": 183,
"vlʲ": 184,
"stvʲɪ": 185,
"ar": 186,
"tsːə": 187,
"ɫʐ": 188,
"aɫə": 189,
"ɪz": 190,
"nʊ": 191,
"ut": 192,
"jʉ": 193,
"nəj": 194,
"nəsʲtʲɪ": 195,
"mɨ": 196,
"dɐɫʐ": 197,
"enʲɪje": 198,
"ɡr": 199,
"nɐr": 200,
"itʲ": 201,
"od": 202,
"mʲɪnʲ": 203,
"prʲɪtstɐ": 204,
"blʲ": 205,
"iɫ": 206,
"sʲlʲ": 207,
"tɨ": 208,
"nɨj": 209,
"sk": 210,
"aʂɨ": 211,
"ɛnʲɪ": 212,
"eɫə": 213,
"in": 214,
"dlʲa": 215,
"jɪvlʲ": 216,
"atsɨɪ": 217,
"alʲɪ": 218,
"op": 219,
"ix": 220,
"mnʲ": 221,
"prʲ": 222,
"nʲɪm": 223,
"rɐb": 224,
"ɛnʲɪjə": 225,
"ɐn": 226,
"pʲɪrʲɪ": 227,
"stə": 228,
"ajʊ": 229,
"ætʲ": 230,
"ɡʲ": 231,
"ɪmʲ": 232,
"jʉɕːɪ": 233,
"oʐɨ": 234,
"xɐdʲ": 235,
"zn": 236,
"vʊ": 237,
"om": 238,
"ʐnə": 239,
"anʲɪ": 240,
"bə": 241,
"nɨje": 242,
"nɨx": 243,
"ɛtʲɪ": 244,
"prʲɪtstɐvʲ": 245,
"trʲ": 246,
"kɐz": 247,
"ajɪt": 248,
"kr": 249,
"svɐ": 250,
"str": 251,
"tsɨɪ": 252,
"otɕɪ": 253,
"atə": 254,
"xə": 255,
"jæ": 256,
"ajɪm": 257,
"pɐdʲ": 258,
"ɨtʲ": 259,
"ɐd": 260,
"tʊ": 261,
"irə": 262,
"spʲ": 263,
"vɐrʲ": 264,
"sp": 265,
"ʐdʊ": 266,
"orə": 267,
"vʲɪr": 268,
"vətʲ": 269,
"dɐɫʐn": 270,
"dʲɪnʲ": 271,
"nːɨx": 272,
"bɐ": 273,
"stʊ": 274,
"imə": 275,
"æjɪtsə": 276,
"em": 277,
"tr": 278,
"zʲɪ": 279,
"æjʊ": 280,
"sʲɪtɕ": 281,
"ejɪt": 282,
"im": 283,
"nɨm": 284,
"zɨ": 285,
"nʲɪə": 286,
"ob": 287,
"jɪdʲɪnʲ": 288,
"dʲnʲɪ": 289,
"ʊtɕ": 290,
"rʲɪʂ": 291,
"ʂə": 292,
"aɡ": 293
},
"merges": [
"ʲ ɪ",
"l ʲ",
"n ʲɪ",
"t ʲ",
"s ʲ",
"s t",
"n ə",
"r ʲɪ",
"t ɕ",
"v ə",
"n ʲ",
"j ɪ",
"t s",
"t ʲɪ",
"j ə",
"l ʲɪ",
"d ʲ",
"t ə",
"r ʲ",
"r ɐ",
"k ə",
"j e",
"r ə",
"p ɐ",
"v ʲ",
"m ʲɪ",
"p rʲɪ",
"m ʲ",
"tɕ ɪ",
"ɫ ə",
"d ʲɪ",
"j ʊ",
"v ʲɪ",
"ɕ ː",
"s ʲɪ",
"ɛ tə",
"ts ɨ",
"k ɐ",
"n ɐ",
"p ə",
"j a",
"n ɨ",
"n ː",
"ʐ ɨ",
"v ɐ",
"z ə",
"d ɐ",
"ɡ ə",
"k ʲɪ",
"e nʲɪ",
"d ə",
"sʲ tʲ",
"ɕː ɪ",
"st ɐ",
"ʂ t",
"m ə",
"s ɐ",
"tʲɪ lʲ",
"p rɐ",
"z ɐ",
"s ə",
"ʂ ɨ",
"ɫ ɐ",
"p ʲ",
"jɪ t",
"sʲ tʲɪ",
"ts ə",
"b ʲɪ",
"ɐ t",
"p ʲɪ",
"a s",
"nː ə",
"a k",
"s ɫ",
"jɪ v",
"z ʲ",
"st və",
"jɪ m",
"ɐ r",
"a ɫ",
"a tsɨ",
"r ʊ",
"e t",
"o və",
"nʲɪ jə",
"d ʊ",
"t ɐ",
"v ɨ",
"m ɐ",
"s kə",
"o lʲ",
"p r",
"x ɐ",
"p rə",
"a lʲ",
"b ʲ",
"o n",
"i tʲɪlʲ",
"s ʊ",
"sʲ ə",
"e r",
"nː ɨ",
"o t",
"m ʊ",
"o j",
"a m",
"a t",
"k ʊ",
"f sʲ",
"a tʲ",
"ɐ b",
"prʲɪ t",
"a n",
"e j",
"ts ː",
"nə jə",
"enʲɪ jə",
"ɡ ʲɪ",
"ʂt o",
"jɪ tsə",
"b ɨ",
"s kʲɪ",
"tʲ e",
"o r",
"j ɵ",
"k ʲ",
"d lʲ",
"i tɕɪ",
"nə və",
"e n",
"f ʲɪ",
"i lʲɪ",
"o ɫ",
"ɡ ɐ",
"v lʲ",
"st vʲɪ",
"a r",
"tsː ə",
"ɫ ʐ",
"a ɫə",
"ɪ z",
"n ʊ",
"u t",
"j ʉ",
"nə j",
"nə sʲtʲɪ",
"m ɨ",
"dɐ ɫʐ",
"enʲɪ je",
"ɡ r",
"nɐ r",
"i tʲ",
"o d",
"mʲɪ nʲ",
"prʲɪt stɐ",
"b lʲ",
"i ɫ",
"sʲ lʲ",
"t ɨ",
"nɨ j",
"s k",
"a ʂɨ",
"ɛ nʲɪ",
"e ɫə",
"i n",
"dlʲ a",
"jɪv lʲ",
"atsɨ ɪ",
"a lʲɪ",
"o p",
"i x",
"m nʲ",
"p rʲ",
"nʲɪ m",
"rɐ b",
"ɛ nʲɪjə",
"ɐ n",
"pʲɪ rʲɪ",
"st ə",
"a jʊ",
"æ tʲ",
"ɡ ʲ",
"ɪ mʲ",
"jʉ ɕːɪ",
"o ʐɨ",
"xɐ dʲ",
"z n",
"v ʊ",
"o m",
"ʐ nə",
"a nʲɪ",
"b ə",
"nɨ je",
"nɨ x",
"ɛ tʲɪ",
"prʲɪtstɐ vʲ",
"t rʲ",
"kɐ z",
"a jɪt",
"k r",
"s vɐ",
"st r",
"tsɨ ɪ",
"o tɕɪ",
"a tə",
"x ə",
"j æ",
"a jɪm",
"pɐ dʲ",
"ɨ tʲ",
"ɐ d",
"t ʊ",
"i rə",
"s pʲ",
"vɐ rʲ",
"s p",
"ʐ dʊ",
"o rə",
"vʲɪ r",
"və tʲ",
"dɐɫʐ n",
"dʲɪ nʲ",
"nːɨ x",
"b ɐ",
"st ʊ",
"i mə",
"æ jɪtsə",
"e m",
"t r",
"z ʲɪ",
"æ jʊ",
"sʲɪ tɕ",
"e jɪt",
"i m",
"nɨ m",
"z ɨ",
"nʲɪ ə",
"o b",
"jɪ dʲɪnʲ",
"dʲ nʲɪ",
"ʊ tɕ",
"rʲɪ ʂ",
"ʂ ə",
"a ɡ"
]
}
}