{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 625, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 626, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 627, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 628, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 629, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": false }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 626 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 628 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "AAAA": 0, "AAAT": 1, "AAAC": 2, "AAAG": 3, "AAAN": 4, "AATA": 5, "AATT": 6, "AATC": 7, "AATG": 8, "AATN": 9, "AACA": 10, "AACT": 11, "AACC": 12, "AACG": 13, "AACN": 14, "AAGA": 15, "AAGT": 16, "AAGC": 17, "AAGG": 18, "AAGN": 19, "AANA": 20, "AANT": 21, "AANC": 22, "AANG": 23, "AANN": 24, "ATAA": 25, "ATAT": 26, "ATAC": 27, "ATAG": 28, "ATAN": 29, "ATTA": 30, "ATTT": 31, "ATTC": 32, "ATTG": 33, "ATTN": 34, "ATCA": 35, "ATCT": 36, "ATCC": 37, "ATCG": 38, "ATCN": 39, "ATGA": 40, "ATGT": 41, "ATGC": 42, "ATGG": 43, "ATGN": 44, "ATNA": 45, "ATNT": 46, "ATNC": 47, "ATNG": 48, "ATNN": 49, "ACAA": 50, "ACAT": 51, "ACAC": 52, "ACAG": 53, "ACAN": 54, "ACTA": 55, "ACTT": 56, "ACTC": 57, "ACTG": 58, "ACTN": 59, "ACCA": 60, "ACCT": 61, "ACCC": 62, "ACCG": 63, "ACCN": 64, "ACGA": 65, "ACGT": 66, "ACGC": 67, "ACGG": 68, "ACGN": 69, "ACNA": 70, "ACNT": 71, "ACNC": 72, "ACNG": 73, "ACNN": 74, "AGAA": 75, "AGAT": 76, "AGAC": 77, "AGAG": 78, "AGAN": 79, "AGTA": 80, "AGTT": 81, "AGTC": 82, "AGTG": 83, "AGTN": 84, "AGCA": 85, "AGCT": 86, "AGCC": 87, "AGCG": 88, "AGCN": 89, "AGGA": 90, "AGGT": 91, "AGGC": 92, "AGGG": 93, "AGGN": 94, "AGNA": 95, "AGNT": 96, "AGNC": 97, "AGNG": 98, "AGNN": 99, "ANAA": 100, "ANAT": 101, "ANAC": 102, "ANAG": 103, "ANAN": 104, "ANTA": 105, "ANTT": 106, "ANTC": 107, "ANTG": 108, "ANTN": 109, "ANCA": 110, "ANCT": 111, "ANCC": 112, "ANCG": 113, "ANCN": 114, "ANGA": 115, "ANGT": 116, "ANGC": 117, "ANGG": 118, "ANGN": 119, "ANNA": 120, "ANNT": 121, "ANNC": 122, "ANNG": 123, "ANNN": 124, "TAAA": 125, "TAAT": 126, "TAAC": 127, "TAAG": 128, "TAAN": 129, "TATA": 130, "TATT": 131, "TATC": 132, "TATG": 133, "TATN": 134, "TACA": 135, "TACT": 136, "TACC": 137, "TACG": 138, "TACN": 139, "TAGA": 140, "TAGT": 141, "TAGC": 142, "TAGG": 143, "TAGN": 144, "TANA": 145, "TANT": 146, "TANC": 147, "TANG": 148, "TANN": 149, "TTAA": 150, "TTAT": 151, "TTAC": 152, "TTAG": 153, "TTAN": 154, "TTTA": 155, "TTTT": 156, "TTTC": 157, "TTTG": 158, "TTTN": 159, "TTCA": 160, "TTCT": 161, "TTCC": 162, "TTCG": 163, "TTCN": 164, "TTGA": 165, "TTGT": 166, "TTGC": 167, "TTGG": 168, "TTGN": 169, "TTNA": 170, "TTNT": 171, "TTNC": 172, "TTNG": 173, "TTNN": 174, "TCAA": 175, "TCAT": 176, "TCAC": 177, "TCAG": 178, "TCAN": 179, "TCTA": 180, "TCTT": 181, "TCTC": 182, "TCTG": 183, "TCTN": 184, "TCCA": 185, "TCCT": 186, "TCCC": 187, "TCCG": 188, "TCCN": 189, "TCGA": 190, "TCGT": 191, "TCGC": 192, "TCGG": 193, "TCGN": 194, "TCNA": 195, "TCNT": 196, "TCNC": 197, "TCNG": 198, "TCNN": 199, "TGAA": 200, "TGAT": 201, "TGAC": 202, "TGAG": 203, "TGAN": 204, "TGTA": 205, "TGTT": 206, "TGTC": 207, "TGTG": 208, "TGTN": 209, "TGCA": 210, "TGCT": 211, "TGCC": 212, "TGCG": 213, "TGCN": 214, "TGGA": 215, "TGGT": 216, "TGGC": 217, "TGGG": 218, "TGGN": 219, "TGNA": 220, "TGNT": 221, "TGNC": 222, "TGNG": 223, "TGNN": 224, "TNAA": 225, "TNAT": 226, "TNAC": 227, "TNAG": 228, "TNAN": 229, "TNTA": 230, "TNTT": 231, "TNTC": 232, "TNTG": 233, "TNTN": 234, "TNCA": 235, "TNCT": 236, "TNCC": 237, "TNCG": 238, "TNCN": 239, "TNGA": 240, "TNGT": 241, "TNGC": 242, "TNGG": 243, "TNGN": 244, "TNNA": 245, "TNNT": 246, "TNNC": 247, "TNNG": 248, "TNNN": 249, "CAAA": 250, "CAAT": 251, "CAAC": 252, "CAAG": 253, "CAAN": 254, "CATA": 255, "CATT": 256, "CATC": 257, "CATG": 258, "CATN": 259, "CACA": 260, "CACT": 261, "CACC": 262, "CACG": 263, "CACN": 264, "CAGA": 265, "CAGT": 266, "CAGC": 267, "CAGG": 268, "CAGN": 269, "CANA": 270, "CANT": 271, "CANC": 272, "CANG": 273, "CANN": 274, "CTAA": 275, "CTAT": 276, "CTAC": 277, "CTAG": 278, "CTAN": 279, "CTTA": 280, "CTTT": 281, "CTTC": 282, "CTTG": 283, "CTTN": 284, "CTCA": 285, "CTCT": 286, "CTCC": 287, "CTCG": 288, "CTCN": 289, "CTGA": 290, "CTGT": 291, "CTGC": 292, "CTGG": 293, "CTGN": 294, "CTNA": 295, "CTNT": 296, "CTNC": 297, "CTNG": 298, "CTNN": 299, "CCAA": 300, "CCAT": 301, "CCAC": 302, "CCAG": 303, "CCAN": 304, "CCTA": 305, "CCTT": 306, "CCTC": 307, "CCTG": 308, "CCTN": 309, "CCCA": 310, "CCCT": 311, "CCCC": 312, "CCCG": 313, "CCCN": 314, "CCGA": 315, "CCGT": 316, "CCGC": 317, "CCGG": 318, "CCGN": 319, "CCNA": 320, "CCNT": 321, "CCNC": 322, "CCNG": 323, "CCNN": 324, "CGAA": 325, "CGAT": 326, "CGAC": 327, "CGAG": 328, "CGAN": 329, "CGTA": 330, "CGTT": 331, "CGTC": 332, "CGTG": 333, "CGTN": 334, "CGCA": 335, "CGCT": 336, "CGCC": 337, "CGCG": 338, "CGCN": 339, "CGGA": 340, "CGGT": 341, "CGGC": 342, "CGGG": 343, "CGGN": 344, "CGNA": 345, "CGNT": 346, "CGNC": 347, "CGNG": 348, "CGNN": 349, "CNAA": 350, "CNAT": 351, "CNAC": 352, "CNAG": 353, "CNAN": 354, "CNTA": 355, "CNTT": 356, "CNTC": 357, "CNTG": 358, "CNTN": 359, "CNCA": 360, "CNCT": 361, "CNCC": 362, "CNCG": 363, "CNCN": 364, "CNGA": 365, "CNGT": 366, "CNGC": 367, "CNGG": 368, "CNGN": 369, "CNNA": 370, "CNNT": 371, "CNNC": 372, "CNNG": 373, "CNNN": 374, "GAAA": 375, "GAAT": 376, "GAAC": 377, "GAAG": 378, "GAAN": 379, "GATA": 380, "GATT": 381, "GATC": 382, "GATG": 383, "GATN": 384, "GACA": 385, "GACT": 386, "GACC": 387, "GACG": 388, "GACN": 389, "GAGA": 390, "GAGT": 391, "GAGC": 392, "GAGG": 393, "GAGN": 394, "GANA": 395, "GANT": 396, "GANC": 397, "GANG": 398, "GANN": 399, "GTAA": 400, "GTAT": 401, "GTAC": 402, "GTAG": 403, "GTAN": 404, "GTTA": 405, "GTTT": 406, "GTTC": 407, "GTTG": 408, "GTTN": 409, "GTCA": 410, "GTCT": 411, "GTCC": 412, "GTCG": 413, "GTCN": 414, "GTGA": 415, "GTGT": 416, "GTGC": 417, "GTGG": 418, "GTGN": 419, "GTNA": 420, "GTNT": 421, "GTNC": 422, "GTNG": 423, "GTNN": 424, "GCAA": 425, "GCAT": 426, "GCAC": 427, "GCAG": 428, "GCAN": 429, "GCTA": 430, "GCTT": 431, "GCTC": 432, "GCTG": 433, "GCTN": 434, "GCCA": 435, "GCCT": 436, "GCCC": 437, "GCCG": 438, "GCCN": 439, "GCGA": 440, "GCGT": 441, "GCGC": 442, "GCGG": 443, "GCGN": 444, "GCNA": 445, "GCNT": 446, "GCNC": 447, "GCNG": 448, "GCNN": 449, "GGAA": 450, "GGAT": 451, "GGAC": 452, "GGAG": 453, "GGAN": 454, "GGTA": 455, "GGTT": 456, "GGTC": 457, "GGTG": 458, "GGTN": 459, "GGCA": 460, "GGCT": 461, "GGCC": 462, "GGCG": 463, "GGCN": 464, "GGGA": 465, "GGGT": 466, "GGGC": 467, "GGGG": 468, "GGGN": 469, "GGNA": 470, "GGNT": 471, "GGNC": 472, "GGNG": 473, "GGNN": 474, "GNAA": 475, "GNAT": 476, "GNAC": 477, "GNAG": 478, "GNAN": 479, "GNTA": 480, "GNTT": 481, "GNTC": 482, "GNTG": 483, "GNTN": 484, "GNCA": 485, "GNCT": 486, "GNCC": 487, "GNCG": 488, "GNCN": 489, "GNGA": 490, "GNGT": 491, "GNGC": 492, "GNGG": 493, "GNGN": 494, "GNNA": 495, "GNNT": 496, "GNNC": 497, "GNNG": 498, "GNNN": 499, "NAAA": 500, "NAAT": 501, "NAAC": 502, "NAAG": 503, "NAAN": 504, "NATA": 505, "NATT": 506, "NATC": 507, "NATG": 508, "NATN": 509, "NACA": 510, "NACT": 511, "NACC": 512, "NACG": 513, "NACN": 514, "NAGA": 515, "NAGT": 516, "NAGC": 517, "NAGG": 518, "NAGN": 519, "NANA": 520, "NANT": 521, "NANC": 522, "NANG": 523, "NANN": 524, "NTAA": 525, "NTAT": 526, "NTAC": 527, "NTAG": 528, "NTAN": 529, "NTTA": 530, "NTTT": 531, "NTTC": 532, "NTTG": 533, "NTTN": 534, "NTCA": 535, "NTCT": 536, "NTCC": 537, "NTCG": 538, "NTCN": 539, "NTGA": 540, "NTGT": 541, "NTGC": 542, "NTGG": 543, "NTGN": 544, "NTNA": 545, "NTNT": 546, "NTNC": 547, "NTNG": 548, "NTNN": 549, "NCAA": 550, "NCAT": 551, "NCAC": 552, "NCAG": 553, "NCAN": 554, "NCTA": 555, "NCTT": 556, "NCTC": 557, "NCTG": 558, "NCTN": 559, "NCCA": 560, "NCCT": 561, "NCCC": 562, "NCCG": 563, "NCCN": 564, "NCGA": 565, "NCGT": 566, "NCGC": 567, "NCGG": 568, "NCGN": 569, "NCNA": 570, "NCNT": 571, "NCNC": 572, "NCNG": 573, "NCNN": 574, "NGAA": 575, "NGAT": 576, "NGAC": 577, "NGAG": 578, "NGAN": 579, "NGTA": 580, "NGTT": 581, "NGTC": 582, "NGTG": 583, "NGTN": 584, "NGCA": 585, "NGCT": 586, "NGCC": 587, "NGCG": 588, "NGCN": 589, "NGGA": 590, "NGGT": 591, "NGGC": 592, "NGGG": 593, "NGGN": 594, "NGNA": 595, "NGNT": 596, "NGNC": 597, "NGNG": 598, "NGNN": 599, "NNAA": 600, "NNAT": 601, "NNAC": 602, "NNAG": 603, "NNAN": 604, "NNTA": 605, "NNTT": 606, "NNTC": 607, "NNTG": 608, "NNTN": 609, "NNCA": 610, "NNCT": 611, "NNCC": 612, "NNCG": 613, "NNCN": 614, "NNGA": 615, "NNGT": 616, "NNGC": 617, "NNGG": 618, "NNGN": 619, "NNNA": 620, "NNNT": 621, "NNNC": 622, "NNNG": 623, "NNNN": 624, "[MASK]": 625, "[CLS]": 626, "[PAD]": 627, "[SEP]": 628, "[UNK]": 629 } } }