{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[START]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[ru]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[S]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[T]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "[O]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "[P]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": false }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": null, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "[START]": 3, "[ru]": 4, "[S]": 5, "[T]": 6, "[O]": 7, "[P]": 8, "!": 9, ",": 10, ".": 11, "?": 12, "`": 13, "a": 14, "b": 15, "d": 16, "e": 17, "f": 18, "i": 19, "j": 20, "k": 21, "l": 22, "m": 23, "n": 24, "o": 25, "p": 26, "r": 27, "s": 28, "t": 29, "u": 30, "v": 31, "x": 32, "z": 33, "æ": 34, "ɐ": 35, "ɕ": 36, "ə": 37, "ɛ": 38, "ɡ": 39, "ɨ": 40, "ɪ": 41, "ɫ": 42, "ɵ": 43, "ʂ": 44, "ʉ": 45, "ʊ": 46, "ʐ": 47, "ʲ": 48, "ː": 49, "ʲɪ": 50, "lʲ": 51, "nʲɪ": 52, "tʲ": 53, "sʲ": 54, "st": 55, "nə": 56, "rʲɪ": 57, "tɕ": 58, "və": 59, "nʲ": 60, "jɪ": 61, "ts": 62, "tʲɪ": 63, "jə": 64, "lʲɪ": 65, "dʲ": 66, "tə": 67, "rʲ": 68, "rɐ": 69, "kə": 70, "je": 71, "rə": 72, "pɐ": 73, "vʲ": 74, "mʲɪ": 75, "prʲɪ": 76, "mʲ": 77, "tɕɪ": 78, "ɫə": 79, "dʲɪ": 80, "jʊ": 81, "vʲɪ": 82, "ɕː": 83, "sʲɪ": 84, "ɛtə": 85, "tsɨ": 86, "kɐ": 87, "nɐ": 88, "pə": 89, "ja": 90, "nɨ": 91, "nː": 92, "ʐɨ": 93, "vɐ": 94, "zə": 95, "dɐ": 96, "ɡə": 97, "kʲɪ": 98, "enʲɪ": 99, "də": 100, "sʲtʲ": 101, "ɕːɪ": 102, "stɐ": 103, "ʂt": 104, "mə": 105, "sɐ": 106, "tʲɪlʲ": 107, "prɐ": 108, "zɐ": 109, "sə": 110, "ʂɨ": 111, "ɫɐ": 112, "pʲ": 113, "jɪt": 114, "sʲtʲɪ": 115, "tsə": 116, "bʲɪ": 117, "ɐt": 118, "pʲɪ": 119, "as": 120, "nːə": 121, "ak": 122, "sɫ": 123, "jɪv": 124, "zʲ": 125, "stvə": 126, "jɪm": 127, "ɐr": 128, "aɫ": 129, "atsɨ": 130, "rʊ": 131, "et": 132, "ovə": 133, "nʲɪjə": 134, "dʊ": 135, "tɐ": 136, "vɨ": 137, "mɐ": 138, "skə": 139, "olʲ": 140, "pr": 141, "xɐ": 142, "prə": 143, "alʲ": 144, "bʲ": 145, "on": 146, "itʲɪlʲ": 147, "sʊ": 148, "sʲə": 149, "er": 150, "nːɨ": 151, "ot": 152, "mʊ": 153, "oj": 154, "am": 155, "at": 156, "kʊ": 157, "fsʲ": 158, "atʲ": 159, "ɐb": 160, "prʲɪt": 161, "an": 162, "ej": 163, "tsː": 164, "nəjə": 165, "enʲɪjə": 166, "ɡʲɪ": 167, "ʂto": 168, "jɪtsə": 169, "bɨ": 170, "skʲɪ": 171, "tʲe": 172, "or": 173, "jɵ": 174, "kʲ": 175, "dlʲ": 176, "itɕɪ": 177, "nəvə": 178, "en": 179, "fʲɪ": 180, "ilʲɪ": 181, "oɫ": 182, "ɡɐ": 183, "vlʲ": 184, "stvʲɪ": 185, "ar": 186, "tsːə": 187, "ɫʐ": 188, "aɫə": 189, "ɪz": 190, "nʊ": 191, "ut": 192, "jʉ": 193, "nəj": 194, "nəsʲtʲɪ": 195, "mɨ": 196, "dɐɫʐ": 197, "enʲɪje": 198, "ɡr": 199, "nɐr": 200, "itʲ": 201, "od": 202, "mʲɪnʲ": 203, "prʲɪtstɐ": 204, "blʲ": 205, "iɫ": 206, "sʲlʲ": 207, "tɨ": 208, "nɨj": 209, "sk": 210, "aʂɨ": 211, "ɛnʲɪ": 212, "eɫə": 213, "in": 214, "dlʲa": 215, "jɪvlʲ": 216, "atsɨɪ": 217, "alʲɪ": 218, "op": 219, "ix": 220, "mnʲ": 221, "prʲ": 222, "nʲɪm": 223, "rɐb": 224, "ɛnʲɪjə": 225, "ɐn": 226, "pʲɪrʲɪ": 227, "stə": 228, "ajʊ": 229, "ætʲ": 230, "ɡʲ": 231, "ɪmʲ": 232, "jʉɕːɪ": 233, "oʐɨ": 234, "xɐdʲ": 235, "zn": 236, "vʊ": 237, "om": 238, "ʐnə": 239, "anʲɪ": 240, "bə": 241, "nɨje": 242, "nɨx": 243, "ɛtʲɪ": 244, "prʲɪtstɐvʲ": 245, "trʲ": 246, "kɐz": 247, "ajɪt": 248, "kr": 249, "svɐ": 250, "str": 251, "tsɨɪ": 252, "otɕɪ": 253, "atə": 254, "xə": 255, "jæ": 256, "ajɪm": 257, "pɐdʲ": 258, "ɨtʲ": 259, "ɐd": 260, "tʊ": 261, "irə": 262, "spʲ": 263, "vɐrʲ": 264, "sp": 265, "ʐdʊ": 266, "orə": 267, "vʲɪr": 268, "vətʲ": 269, "dɐɫʐn": 270, "dʲɪnʲ": 271, "nːɨx": 272, "bɐ": 273, "stʊ": 274, "imə": 275, "æjɪtsə": 276, "em": 277, "tr": 278, "zʲɪ": 279, "æjʊ": 280, "sʲɪtɕ": 281, "ejɪt": 282, "im": 283, "nɨm": 284, "zɨ": 285, "nʲɪə": 286, "ob": 287, "jɪdʲɪnʲ": 288, "dʲnʲɪ": 289, "ʊtɕ": 290, "rʲɪʂ": 291, "ʂə": 292, "aɡ": 293 }, "merges": [ "ʲ ɪ", "l ʲ", "n ʲɪ", "t ʲ", "s ʲ", "s t", "n ə", "r ʲɪ", "t ɕ", "v ə", "n ʲ", "j ɪ", "t s", "t ʲɪ", "j ə", "l ʲɪ", "d ʲ", "t ə", "r ʲ", "r ɐ", "k ə", "j e", "r ə", "p ɐ", "v ʲ", "m ʲɪ", "p rʲɪ", "m ʲ", "tɕ ɪ", "ɫ ə", "d ʲɪ", "j ʊ", "v ʲɪ", "ɕ ː", "s ʲɪ", "ɛ tə", "ts ɨ", "k ɐ", "n ɐ", "p ə", "j a", "n ɨ", "n ː", "ʐ ɨ", "v ɐ", "z ə", "d ɐ", "ɡ ə", "k ʲɪ", "e nʲɪ", "d ə", "sʲ tʲ", "ɕː ɪ", "st ɐ", "ʂ t", "m ə", "s ɐ", "tʲɪ lʲ", "p rɐ", "z ɐ", "s ə", "ʂ ɨ", "ɫ ɐ", "p ʲ", "jɪ t", "sʲ tʲɪ", "ts ə", "b ʲɪ", "ɐ t", "p ʲɪ", "a s", "nː ə", "a k", "s ɫ", "jɪ v", "z ʲ", "st və", "jɪ m", "ɐ r", "a ɫ", "a tsɨ", "r ʊ", "e t", "o və", "nʲɪ jə", "d ʊ", "t ɐ", "v ɨ", "m ɐ", "s kə", "o lʲ", "p r", "x ɐ", "p rə", "a lʲ", "b ʲ", "o n", "i tʲɪlʲ", "s ʊ", "sʲ ə", "e r", "nː ɨ", "o t", "m ʊ", "o j", "a m", "a t", "k ʊ", "f sʲ", "a tʲ", "ɐ b", "prʲɪ t", "a n", "e j", "ts ː", "nə jə", "enʲɪ jə", "ɡ ʲɪ", "ʂt o", "jɪ tsə", "b ɨ", "s kʲɪ", "tʲ e", "o r", "j ɵ", "k ʲ", "d lʲ", "i tɕɪ", "nə və", "e n", "f ʲɪ", "i lʲɪ", "o ɫ", "ɡ ɐ", "v lʲ", "st vʲɪ", "a r", "tsː ə", "ɫ ʐ", "a ɫə", "ɪ z", "n ʊ", "u t", "j ʉ", "nə j", "nə sʲtʲɪ", "m ɨ", "dɐ ɫʐ", "enʲɪ je", "ɡ r", "nɐ r", "i tʲ", "o d", "mʲɪ nʲ", "prʲɪt stɐ", "b lʲ", "i ɫ", "sʲ lʲ", "t ɨ", "nɨ j", "s k", "a ʂɨ", "ɛ nʲɪ", "e ɫə", "i n", "dlʲ a", "jɪv lʲ", "atsɨ ɪ", "a lʲɪ", "o p", "i x", "m nʲ", "p rʲ", "nʲɪ m", "rɐ b", "ɛ nʲɪjə", "ɐ n", "pʲɪ rʲɪ", "st ə", "a jʊ", "æ tʲ", "ɡ ʲ", "ɪ mʲ", "jʉ ɕːɪ", "o ʐɨ", "xɐ dʲ", "z n", "v ʊ", "o m", "ʐ nə", "a nʲɪ", "b ə", "nɨ je", "nɨ x", "ɛ tʲɪ", "prʲɪtstɐ vʲ", "t rʲ", "kɐ z", "a jɪt", "k r", "s vɐ", "st r", "tsɨ ɪ", "o tɕɪ", "a tə", "x ə", "j æ", "a jɪm", "pɐ dʲ", "ɨ tʲ", "ɐ d", "t ʊ", "i rə", "s pʲ", "vɐ rʲ", "s p", "ʐ dʊ", "o rə", "vʲɪ r", "və tʲ", "dɐɫʐ n", "dʲɪ nʲ", "nːɨ x", "b ɐ", "st ʊ", "i mə", "æ jɪtsə", "e m", "t r", "z ʲɪ", "æ jʊ", "sʲɪ tɕ", "e jɪt", "i m", "nɨ m", "z ɨ", "nʲɪ ə", "o b", "jɪ dʲɪnʲ", "dʲ nʲɪ", "ʊ tɕ", "rʲɪ ʂ", "ʂ ə", "a ɡ" ] } }