|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 625, |
|
"content": "[MASK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 626, |
|
"content": "[CLS]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 627, |
|
"content": "[PAD]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 628, |
|
"content": "[SEP]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 629, |
|
"content": "[UNK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "BertNormalizer", |
|
"clean_text": true, |
|
"handle_chinese_chars": true, |
|
"strip_accents": null, |
|
"lowercase": false |
|
}, |
|
"pre_tokenizer": { |
|
"type": "BertPreTokenizer" |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "[CLS]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "[CLS]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"[CLS]": { |
|
"id": "[CLS]", |
|
"ids": [ |
|
626 |
|
], |
|
"tokens": [ |
|
"[CLS]" |
|
] |
|
}, |
|
"[SEP]": { |
|
"id": "[SEP]", |
|
"ids": [ |
|
628 |
|
], |
|
"tokens": [ |
|
"[SEP]" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": { |
|
"type": "WordPiece", |
|
"prefix": "##", |
|
"cleanup": true |
|
}, |
|
"model": { |
|
"type": "WordPiece", |
|
"unk_token": "[UNK]", |
|
"continuing_subword_prefix": "##", |
|
"max_input_chars_per_word": 100, |
|
"vocab": { |
|
"AAAA": 0, |
|
"AAAT": 1, |
|
"AAAC": 2, |
|
"AAAG": 3, |
|
"AAAN": 4, |
|
"AATA": 5, |
|
"AATT": 6, |
|
"AATC": 7, |
|
"AATG": 8, |
|
"AATN": 9, |
|
"AACA": 10, |
|
"AACT": 11, |
|
"AACC": 12, |
|
"AACG": 13, |
|
"AACN": 14, |
|
"AAGA": 15, |
|
"AAGT": 16, |
|
"AAGC": 17, |
|
"AAGG": 18, |
|
"AAGN": 19, |
|
"AANA": 20, |
|
"AANT": 21, |
|
"AANC": 22, |
|
"AANG": 23, |
|
"AANN": 24, |
|
"ATAA": 25, |
|
"ATAT": 26, |
|
"ATAC": 27, |
|
"ATAG": 28, |
|
"ATAN": 29, |
|
"ATTA": 30, |
|
"ATTT": 31, |
|
"ATTC": 32, |
|
"ATTG": 33, |
|
"ATTN": 34, |
|
"ATCA": 35, |
|
"ATCT": 36, |
|
"ATCC": 37, |
|
"ATCG": 38, |
|
"ATCN": 39, |
|
"ATGA": 40, |
|
"ATGT": 41, |
|
"ATGC": 42, |
|
"ATGG": 43, |
|
"ATGN": 44, |
|
"ATNA": 45, |
|
"ATNT": 46, |
|
"ATNC": 47, |
|
"ATNG": 48, |
|
"ATNN": 49, |
|
"ACAA": 50, |
|
"ACAT": 51, |
|
"ACAC": 52, |
|
"ACAG": 53, |
|
"ACAN": 54, |
|
"ACTA": 55, |
|
"ACTT": 56, |
|
"ACTC": 57, |
|
"ACTG": 58, |
|
"ACTN": 59, |
|
"ACCA": 60, |
|
"ACCT": 61, |
|
"ACCC": 62, |
|
"ACCG": 63, |
|
"ACCN": 64, |
|
"ACGA": 65, |
|
"ACGT": 66, |
|
"ACGC": 67, |
|
"ACGG": 68, |
|
"ACGN": 69, |
|
"ACNA": 70, |
|
"ACNT": 71, |
|
"ACNC": 72, |
|
"ACNG": 73, |
|
"ACNN": 74, |
|
"AGAA": 75, |
|
"AGAT": 76, |
|
"AGAC": 77, |
|
"AGAG": 78, |
|
"AGAN": 79, |
|
"AGTA": 80, |
|
"AGTT": 81, |
|
"AGTC": 82, |
|
"AGTG": 83, |
|
"AGTN": 84, |
|
"AGCA": 85, |
|
"AGCT": 86, |
|
"AGCC": 87, |
|
"AGCG": 88, |
|
"AGCN": 89, |
|
"AGGA": 90, |
|
"AGGT": 91, |
|
"AGGC": 92, |
|
"AGGG": 93, |
|
"AGGN": 94, |
|
"AGNA": 95, |
|
"AGNT": 96, |
|
"AGNC": 97, |
|
"AGNG": 98, |
|
"AGNN": 99, |
|
"ANAA": 100, |
|
"ANAT": 101, |
|
"ANAC": 102, |
|
"ANAG": 103, |
|
"ANAN": 104, |
|
"ANTA": 105, |
|
"ANTT": 106, |
|
"ANTC": 107, |
|
"ANTG": 108, |
|
"ANTN": 109, |
|
"ANCA": 110, |
|
"ANCT": 111, |
|
"ANCC": 112, |
|
"ANCG": 113, |
|
"ANCN": 114, |
|
"ANGA": 115, |
|
"ANGT": 116, |
|
"ANGC": 117, |
|
"ANGG": 118, |
|
"ANGN": 119, |
|
"ANNA": 120, |
|
"ANNT": 121, |
|
"ANNC": 122, |
|
"ANNG": 123, |
|
"ANNN": 124, |
|
"TAAA": 125, |
|
"TAAT": 126, |
|
"TAAC": 127, |
|
"TAAG": 128, |
|
"TAAN": 129, |
|
"TATA": 130, |
|
"TATT": 131, |
|
"TATC": 132, |
|
"TATG": 133, |
|
"TATN": 134, |
|
"TACA": 135, |
|
"TACT": 136, |
|
"TACC": 137, |
|
"TACG": 138, |
|
"TACN": 139, |
|
"TAGA": 140, |
|
"TAGT": 141, |
|
"TAGC": 142, |
|
"TAGG": 143, |
|
"TAGN": 144, |
|
"TANA": 145, |
|
"TANT": 146, |
|
"TANC": 147, |
|
"TANG": 148, |
|
"TANN": 149, |
|
"TTAA": 150, |
|
"TTAT": 151, |
|
"TTAC": 152, |
|
"TTAG": 153, |
|
"TTAN": 154, |
|
"TTTA": 155, |
|
"TTTT": 156, |
|
"TTTC": 157, |
|
"TTTG": 158, |
|
"TTTN": 159, |
|
"TTCA": 160, |
|
"TTCT": 161, |
|
"TTCC": 162, |
|
"TTCG": 163, |
|
"TTCN": 164, |
|
"TTGA": 165, |
|
"TTGT": 166, |
|
"TTGC": 167, |
|
"TTGG": 168, |
|
"TTGN": 169, |
|
"TTNA": 170, |
|
"TTNT": 171, |
|
"TTNC": 172, |
|
"TTNG": 173, |
|
"TTNN": 174, |
|
"TCAA": 175, |
|
"TCAT": 176, |
|
"TCAC": 177, |
|
"TCAG": 178, |
|
"TCAN": 179, |
|
"TCTA": 180, |
|
"TCTT": 181, |
|
"TCTC": 182, |
|
"TCTG": 183, |
|
"TCTN": 184, |
|
"TCCA": 185, |
|
"TCCT": 186, |
|
"TCCC": 187, |
|
"TCCG": 188, |
|
"TCCN": 189, |
|
"TCGA": 190, |
|
"TCGT": 191, |
|
"TCGC": 192, |
|
"TCGG": 193, |
|
"TCGN": 194, |
|
"TCNA": 195, |
|
"TCNT": 196, |
|
"TCNC": 197, |
|
"TCNG": 198, |
|
"TCNN": 199, |
|
"TGAA": 200, |
|
"TGAT": 201, |
|
"TGAC": 202, |
|
"TGAG": 203, |
|
"TGAN": 204, |
|
"TGTA": 205, |
|
"TGTT": 206, |
|
"TGTC": 207, |
|
"TGTG": 208, |
|
"TGTN": 209, |
|
"TGCA": 210, |
|
"TGCT": 211, |
|
"TGCC": 212, |
|
"TGCG": 213, |
|
"TGCN": 214, |
|
"TGGA": 215, |
|
"TGGT": 216, |
|
"TGGC": 217, |
|
"TGGG": 218, |
|
"TGGN": 219, |
|
"TGNA": 220, |
|
"TGNT": 221, |
|
"TGNC": 222, |
|
"TGNG": 223, |
|
"TGNN": 224, |
|
"TNAA": 225, |
|
"TNAT": 226, |
|
"TNAC": 227, |
|
"TNAG": 228, |
|
"TNAN": 229, |
|
"TNTA": 230, |
|
"TNTT": 231, |
|
"TNTC": 232, |
|
"TNTG": 233, |
|
"TNTN": 234, |
|
"TNCA": 235, |
|
"TNCT": 236, |
|
"TNCC": 237, |
|
"TNCG": 238, |
|
"TNCN": 239, |
|
"TNGA": 240, |
|
"TNGT": 241, |
|
"TNGC": 242, |
|
"TNGG": 243, |
|
"TNGN": 244, |
|
"TNNA": 245, |
|
"TNNT": 246, |
|
"TNNC": 247, |
|
"TNNG": 248, |
|
"TNNN": 249, |
|
"CAAA": 250, |
|
"CAAT": 251, |
|
"CAAC": 252, |
|
"CAAG": 253, |
|
"CAAN": 254, |
|
"CATA": 255, |
|
"CATT": 256, |
|
"CATC": 257, |
|
"CATG": 258, |
|
"CATN": 259, |
|
"CACA": 260, |
|
"CACT": 261, |
|
"CACC": 262, |
|
"CACG": 263, |
|
"CACN": 264, |
|
"CAGA": 265, |
|
"CAGT": 266, |
|
"CAGC": 267, |
|
"CAGG": 268, |
|
"CAGN": 269, |
|
"CANA": 270, |
|
"CANT": 271, |
|
"CANC": 272, |
|
"CANG": 273, |
|
"CANN": 274, |
|
"CTAA": 275, |
|
"CTAT": 276, |
|
"CTAC": 277, |
|
"CTAG": 278, |
|
"CTAN": 279, |
|
"CTTA": 280, |
|
"CTTT": 281, |
|
"CTTC": 282, |
|
"CTTG": 283, |
|
"CTTN": 284, |
|
"CTCA": 285, |
|
"CTCT": 286, |
|
"CTCC": 287, |
|
"CTCG": 288, |
|
"CTCN": 289, |
|
"CTGA": 290, |
|
"CTGT": 291, |
|
"CTGC": 292, |
|
"CTGG": 293, |
|
"CTGN": 294, |
|
"CTNA": 295, |
|
"CTNT": 296, |
|
"CTNC": 297, |
|
"CTNG": 298, |
|
"CTNN": 299, |
|
"CCAA": 300, |
|
"CCAT": 301, |
|
"CCAC": 302, |
|
"CCAG": 303, |
|
"CCAN": 304, |
|
"CCTA": 305, |
|
"CCTT": 306, |
|
"CCTC": 307, |
|
"CCTG": 308, |
|
"CCTN": 309, |
|
"CCCA": 310, |
|
"CCCT": 311, |
|
"CCCC": 312, |
|
"CCCG": 313, |
|
"CCCN": 314, |
|
"CCGA": 315, |
|
"CCGT": 316, |
|
"CCGC": 317, |
|
"CCGG": 318, |
|
"CCGN": 319, |
|
"CCNA": 320, |
|
"CCNT": 321, |
|
"CCNC": 322, |
|
"CCNG": 323, |
|
"CCNN": 324, |
|
"CGAA": 325, |
|
"CGAT": 326, |
|
"CGAC": 327, |
|
"CGAG": 328, |
|
"CGAN": 329, |
|
"CGTA": 330, |
|
"CGTT": 331, |
|
"CGTC": 332, |
|
"CGTG": 333, |
|
"CGTN": 334, |
|
"CGCA": 335, |
|
"CGCT": 336, |
|
"CGCC": 337, |
|
"CGCG": 338, |
|
"CGCN": 339, |
|
"CGGA": 340, |
|
"CGGT": 341, |
|
"CGGC": 342, |
|
"CGGG": 343, |
|
"CGGN": 344, |
|
"CGNA": 345, |
|
"CGNT": 346, |
|
"CGNC": 347, |
|
"CGNG": 348, |
|
"CGNN": 349, |
|
"CNAA": 350, |
|
"CNAT": 351, |
|
"CNAC": 352, |
|
"CNAG": 353, |
|
"CNAN": 354, |
|
"CNTA": 355, |
|
"CNTT": 356, |
|
"CNTC": 357, |
|
"CNTG": 358, |
|
"CNTN": 359, |
|
"CNCA": 360, |
|
"CNCT": 361, |
|
"CNCC": 362, |
|
"CNCG": 363, |
|
"CNCN": 364, |
|
"CNGA": 365, |
|
"CNGT": 366, |
|
"CNGC": 367, |
|
"CNGG": 368, |
|
"CNGN": 369, |
|
"CNNA": 370, |
|
"CNNT": 371, |
|
"CNNC": 372, |
|
"CNNG": 373, |
|
"CNNN": 374, |
|
"GAAA": 375, |
|
"GAAT": 376, |
|
"GAAC": 377, |
|
"GAAG": 378, |
|
"GAAN": 379, |
|
"GATA": 380, |
|
"GATT": 381, |
|
"GATC": 382, |
|
"GATG": 383, |
|
"GATN": 384, |
|
"GACA": 385, |
|
"GACT": 386, |
|
"GACC": 387, |
|
"GACG": 388, |
|
"GACN": 389, |
|
"GAGA": 390, |
|
"GAGT": 391, |
|
"GAGC": 392, |
|
"GAGG": 393, |
|
"GAGN": 394, |
|
"GANA": 395, |
|
"GANT": 396, |
|
"GANC": 397, |
|
"GANG": 398, |
|
"GANN": 399, |
|
"GTAA": 400, |
|
"GTAT": 401, |
|
"GTAC": 402, |
|
"GTAG": 403, |
|
"GTAN": 404, |
|
"GTTA": 405, |
|
"GTTT": 406, |
|
"GTTC": 407, |
|
"GTTG": 408, |
|
"GTTN": 409, |
|
"GTCA": 410, |
|
"GTCT": 411, |
|
"GTCC": 412, |
|
"GTCG": 413, |
|
"GTCN": 414, |
|
"GTGA": 415, |
|
"GTGT": 416, |
|
"GTGC": 417, |
|
"GTGG": 418, |
|
"GTGN": 419, |
|
"GTNA": 420, |
|
"GTNT": 421, |
|
"GTNC": 422, |
|
"GTNG": 423, |
|
"GTNN": 424, |
|
"GCAA": 425, |
|
"GCAT": 426, |
|
"GCAC": 427, |
|
"GCAG": 428, |
|
"GCAN": 429, |
|
"GCTA": 430, |
|
"GCTT": 431, |
|
"GCTC": 432, |
|
"GCTG": 433, |
|
"GCTN": 434, |
|
"GCCA": 435, |
|
"GCCT": 436, |
|
"GCCC": 437, |
|
"GCCG": 438, |
|
"GCCN": 439, |
|
"GCGA": 440, |
|
"GCGT": 441, |
|
"GCGC": 442, |
|
"GCGG": 443, |
|
"GCGN": 444, |
|
"GCNA": 445, |
|
"GCNT": 446, |
|
"GCNC": 447, |
|
"GCNG": 448, |
|
"GCNN": 449, |
|
"GGAA": 450, |
|
"GGAT": 451, |
|
"GGAC": 452, |
|
"GGAG": 453, |
|
"GGAN": 454, |
|
"GGTA": 455, |
|
"GGTT": 456, |
|
"GGTC": 457, |
|
"GGTG": 458, |
|
"GGTN": 459, |
|
"GGCA": 460, |
|
"GGCT": 461, |
|
"GGCC": 462, |
|
"GGCG": 463, |
|
"GGCN": 464, |
|
"GGGA": 465, |
|
"GGGT": 466, |
|
"GGGC": 467, |
|
"GGGG": 468, |
|
"GGGN": 469, |
|
"GGNA": 470, |
|
"GGNT": 471, |
|
"GGNC": 472, |
|
"GGNG": 473, |
|
"GGNN": 474, |
|
"GNAA": 475, |
|
"GNAT": 476, |
|
"GNAC": 477, |
|
"GNAG": 478, |
|
"GNAN": 479, |
|
"GNTA": 480, |
|
"GNTT": 481, |
|
"GNTC": 482, |
|
"GNTG": 483, |
|
"GNTN": 484, |
|
"GNCA": 485, |
|
"GNCT": 486, |
|
"GNCC": 487, |
|
"GNCG": 488, |
|
"GNCN": 489, |
|
"GNGA": 490, |
|
"GNGT": 491, |
|
"GNGC": 492, |
|
"GNGG": 493, |
|
"GNGN": 494, |
|
"GNNA": 495, |
|
"GNNT": 496, |
|
"GNNC": 497, |
|
"GNNG": 498, |
|
"GNNN": 499, |
|
"NAAA": 500, |
|
"NAAT": 501, |
|
"NAAC": 502, |
|
"NAAG": 503, |
|
"NAAN": 504, |
|
"NATA": 505, |
|
"NATT": 506, |
|
"NATC": 507, |
|
"NATG": 508, |
|
"NATN": 509, |
|
"NACA": 510, |
|
"NACT": 511, |
|
"NACC": 512, |
|
"NACG": 513, |
|
"NACN": 514, |
|
"NAGA": 515, |
|
"NAGT": 516, |
|
"NAGC": 517, |
|
"NAGG": 518, |
|
"NAGN": 519, |
|
"NANA": 520, |
|
"NANT": 521, |
|
"NANC": 522, |
|
"NANG": 523, |
|
"NANN": 524, |
|
"NTAA": 525, |
|
"NTAT": 526, |
|
"NTAC": 527, |
|
"NTAG": 528, |
|
"NTAN": 529, |
|
"NTTA": 530, |
|
"NTTT": 531, |
|
"NTTC": 532, |
|
"NTTG": 533, |
|
"NTTN": 534, |
|
"NTCA": 535, |
|
"NTCT": 536, |
|
"NTCC": 537, |
|
"NTCG": 538, |
|
"NTCN": 539, |
|
"NTGA": 540, |
|
"NTGT": 541, |
|
"NTGC": 542, |
|
"NTGG": 543, |
|
"NTGN": 544, |
|
"NTNA": 545, |
|
"NTNT": 546, |
|
"NTNC": 547, |
|
"NTNG": 548, |
|
"NTNN": 549, |
|
"NCAA": 550, |
|
"NCAT": 551, |
|
"NCAC": 552, |
|
"NCAG": 553, |
|
"NCAN": 554, |
|
"NCTA": 555, |
|
"NCTT": 556, |
|
"NCTC": 557, |
|
"NCTG": 558, |
|
"NCTN": 559, |
|
"NCCA": 560, |
|
"NCCT": 561, |
|
"NCCC": 562, |
|
"NCCG": 563, |
|
"NCCN": 564, |
|
"NCGA": 565, |
|
"NCGT": 566, |
|
"NCGC": 567, |
|
"NCGG": 568, |
|
"NCGN": 569, |
|
"NCNA": 570, |
|
"NCNT": 571, |
|
"NCNC": 572, |
|
"NCNG": 573, |
|
"NCNN": 574, |
|
"NGAA": 575, |
|
"NGAT": 576, |
|
"NGAC": 577, |
|
"NGAG": 578, |
|
"NGAN": 579, |
|
"NGTA": 580, |
|
"NGTT": 581, |
|
"NGTC": 582, |
|
"NGTG": 583, |
|
"NGTN": 584, |
|
"NGCA": 585, |
|
"NGCT": 586, |
|
"NGCC": 587, |
|
"NGCG": 588, |
|
"NGCN": 589, |
|
"NGGA": 590, |
|
"NGGT": 591, |
|
"NGGC": 592, |
|
"NGGG": 593, |
|
"NGGN": 594, |
|
"NGNA": 595, |
|
"NGNT": 596, |
|
"NGNC": 597, |
|
"NGNG": 598, |
|
"NGNN": 599, |
|
"NNAA": 600, |
|
"NNAT": 601, |
|
"NNAC": 602, |
|
"NNAG": 603, |
|
"NNAN": 604, |
|
"NNTA": 605, |
|
"NNTT": 606, |
|
"NNTC": 607, |
|
"NNTG": 608, |
|
"NNTN": 609, |
|
"NNCA": 610, |
|
"NNCT": 611, |
|
"NNCC": 612, |
|
"NNCG": 613, |
|
"NNCN": 614, |
|
"NNGA": 615, |
|
"NNGT": 616, |
|
"NNGC": 617, |
|
"NNGG": 618, |
|
"NNGN": 619, |
|
"NNNA": 620, |
|
"NNNT": 621, |
|
"NNNC": 622, |
|
"NNNG": 623, |
|
"NNNN": 624, |
|
"[MASK]": 625, |
|
"[CLS]": 626, |
|
"[PAD]": 627, |
|
"[SEP]": 628, |
|
"[UNK]": 629 |
|
} |
|
} |
|
} |