Charm_15 / tokenizer.json
GeminiFan207's picture
Create tokenizer.json
aa3647a verified
raw
history blame contribute delete
643 Bytes
{
"version": "1.0",
"model": "BPE",
"vocab": {
"<|begoftext|>": 0,
"<|endoftext|>": 1,
"<pad>": 2,
"<unk>": 3,
"the": 4,
"a": 5,
"and": 6,
"to": 7,
"of": 8,
"in": 9,
"I": 10,
"is": 11,
"it": 12,
".": 13,
",": 14,
"th": 15,
"an": 16,
"ing": 17,
"er": 18,
"on": 19
},
"merges": [
"t h",
"th e",
"a n",
"a nd",
"i n",
"o f",
"to k",
"i s",
"in g",
"e r",
"o n"
],
"special_tokens": {
"pad_token": "<pad>",
"bos_token": "<|begoftext|>",
"eos_token": "<|endoftext|>",
"unk_token": "<unk>"
}
}