{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "[UNK]": 0, "[MASK]": 1, "0": 2, "1": 3, "2": 4, "3": 5, "4": 6, "5": 7, "6": 8, "7": 9, "8": 10, "9": 11, "a": 12, "b": 13, "c": 14, "d": 15, "e": 16, "f": 17, "g": 18, "h": 19, "i": 20, "j": 21, "k": 22, "l": 23, "m": 24, "n": 25, "o": 26, "p": 27, "q": 28, "r": 29, "s": 30, "t": 31, "u": 32, "v": 33, "w": 34, "x": 35, "y": 36, "z": 37, "A": 38, "B": 39, "C": 40, "D": 41, "E": 42, "F": 43, "G": 44, "H": 45, "I": 46, "J": 47, "K": 48, "L": 49, "M": 50, "N": 51, "O": 52, "P": 53, "Q": 54, "R": 55, "S": 56, "T": 57, "U": 58, "V": 59, "W": 60, "X": 61, "Y": 62, "Z": 63, "!": 64, "\"": 65, "#": 66, "$": 67, "%": 68, "&": 69, "'": 70, "(": 71, ")": 72, "*": 73, "+": 74, ",": 75, "-": 76, ".": 77, "/": 78, ":": 79, ";": 80, "<": 81, "=": 82, ">": 83, "?": 84, "@": 85, "[": 86, "\\": 87, "]": 88, "^": 89, "_": 90, "`": 91, "{": 92, "|": 93, "}": 94, "~": 95, " ": 96 }, "merges": [] } }