Llama3-2-3B-IT-Byte / tokenizer.json
benjamin's picture
update
dc7e313
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": false
},
"post_processor": {
"type": "Sequence",
"processors": [
{
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
{
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<|begin_of_text|>": {
"id": "<|begin_of_text|>",
"ids": [
256
],
"tokens": [
"<|begin_of_text|>"
]
}
}
}
]
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "Unigram",
"unk_id": null,
"vocab": [
[
"Ā",
0.0
],
[
"ā",
0.0
],
[
"Ă",
0.0
],
[
"ă",
0.0
],
[
"Ą",
0.0
],
[
"ą",
0.0
],
[
"Ć",
0.0
],
[
"ć",
0.0
],
[
"Ĉ",
0.0
],
[
"ĉ",
0.0
],
[
"Ċ",
0.0
],
[
"ċ",
0.0
],
[
"Č",
0.0
],
[
"č",
0.0
],
[
"Ď",
0.0
],
[
"ď",
0.0
],
[
"Đ",
0.0
],
[
"đ",
0.0
],
[
"Ē",
0.0
],
[
"ē",
0.0
],
[
"Ĕ",
0.0
],
[
"ĕ",
0.0
],
[
"Ė",
0.0
],
[
"ė",
0.0
],
[
"Ę",
0.0
],
[
"ę",
0.0
],
[
"Ě",
0.0
],
[
"ě",
0.0
],
[
"Ĝ",
0.0
],
[
"ĝ",
0.0
],
[
"Ğ",
0.0
],
[
"ğ",
0.0
],
[
"Ġ",
0.0
],
[
"!",
0.0
],
[
"\"",
0.0
],
[
"#",
0.0
],
[
"$",
0.0
],
[
"%",
0.0
],
[
"&",
0.0
],
[
"'",
0.0
],
[
"(",
0.0
],
[
")",
0.0
],
[
"*",
0.0
],
[
"+",
0.0
],
[
",",
0.0
],
[
"-",
0.0
],
[
".",
0.0
],
[
"/",
0.0
],
[
"0",
0.0
],
[
"1",
0.0
],
[
"2",
0.0
],
[
"3",
0.0
],
[
"4",
0.0
],
[
"5",
0.0
],
[
"6",
0.0
],
[
"7",
0.0
],
[
"8",
0.0
],
[
"9",
0.0
],
[
":",
0.0
],
[
";",
0.0
],
[
"<",
0.0
],
[
"=",
0.0
],
[
">",
0.0
],
[
"?",
0.0
],
[
"@",
0.0
],
[
"A",
0.0
],
[
"B",
0.0
],
[
"C",
0.0
],
[
"D",
0.0
],
[
"E",
0.0
],
[
"F",
0.0
],
[
"G",
0.0
],
[
"H",
0.0
],
[
"I",
0.0
],
[
"J",
0.0
],
[
"K",
0.0
],
[
"L",
0.0
],
[
"M",
0.0
],
[
"N",
0.0
],
[
"O",
0.0
],
[
"P",
0.0
],
[
"Q",
0.0
],
[
"R",
0.0
],
[
"S",
0.0
],
[
"T",
0.0
],
[
"U",
0.0
],
[
"V",
0.0
],
[
"W",
0.0
],
[
"X",
0.0
],
[
"Y",
0.0
],
[
"Z",
0.0
],
[
"[",
0.0
],
[
"\\",
0.0
],
[
"]",
0.0
],
[
"^",
0.0
],
[
"_",
0.0
],
[
"`",
0.0
],
[
"a",
0.0
],
[
"b",
0.0
],
[
"c",
0.0
],
[
"d",
0.0
],
[
"e",
0.0
],
[
"f",
0.0
],
[
"g",
0.0
],
[
"h",
0.0
],
[
"i",
0.0
],
[
"j",
0.0
],
[
"k",
0.0
],
[
"l",
0.0
],
[
"m",
0.0
],
[
"n",
0.0
],
[
"o",
0.0
],
[
"p",
0.0
],
[
"q",
0.0
],
[
"r",
0.0
],
[
"s",
0.0
],
[
"t",
0.0
],
[
"u",
0.0
],
[
"v",
0.0
],
[
"w",
0.0
],
[
"x",
0.0
],
[
"y",
0.0
],
[
"z",
0.0
],
[
"{",
0.0
],
[
"|",
0.0
],
[
"}",
0.0
],
[
"~",
0.0
],
[
"ġ",
0.0
],
[
"Ģ",
0.0
],
[
"ģ",
0.0
],
[
"Ĥ",
0.0
],
[
"ĥ",
0.0
],
[
"Ħ",
0.0
],
[
"ħ",
0.0
],
[
"Ĩ",
0.0
],
[
"ĩ",
0.0
],
[
"Ī",
0.0
],
[
"ī",
0.0
],
[
"Ĭ",
0.0
],
[
"ĭ",
0.0
],
[
"Į",
0.0
],
[
"į",
0.0
],
[
"İ",
0.0
],
[
"ı",
0.0
],
[
"IJ",
0.0
],
[
"ij",
0.0
],
[
"Ĵ",
0.0
],
[
"ĵ",
0.0
],
[
"Ķ",
0.0
],
[
"ķ",
0.0
],
[
"ĸ",
0.0
],
[
"Ĺ",
0.0
],
[
"ĺ",
0.0
],
[
"Ļ",
0.0
],
[
"ļ",
0.0
],
[
"Ľ",
0.0
],
[
"ľ",
0.0
],
[
"Ŀ",
0.0
],
[
"ŀ",
0.0
],
[
"Ł",
0.0
],
[
"ł",
0.0
],
[
"¡",
0.0
],
[
"¢",
0.0
],
[
"£",
0.0
],
[
"¤",
0.0
],
[
"¥",
0.0
],
[
"¦",
0.0
],
[
"§",
0.0
],
[
"¨",
0.0
],
[
"©",
0.0
],
[
"ª",
0.0
],
[
"«",
0.0
],
[
"¬",
0.0
],
[
"Ń",
0.0
],
[
"®",
0.0
],
[
"¯",
0.0
],
[
"°",
0.0
],
[
"±",
0.0
],
[
"²",
0.0
],
[
"³",
0.0
],
[
"´",
0.0
],
[
"µ",
0.0
],
[
"¶",
0.0
],
[
"·",
0.0
],
[
"¸",
0.0
],
[
"¹",
0.0
],
[
"º",
0.0
],
[
"»",
0.0
],
[
"¼",
0.0
],
[
"½",
0.0
],
[
"¾",
0.0
],
[
"¿",
0.0
],
[
"À",
0.0
],
[
"Á",
0.0
],
[
"Â",
0.0
],
[
"Ã",
0.0
],
[
"Ä",
0.0
],
[
"Å",
0.0
],
[
"Æ",
0.0
],
[
"Ç",
0.0
],
[
"È",
0.0
],
[
"É",
0.0
],
[
"Ê",
0.0
],
[
"Ë",
0.0
],
[
"Ì",
0.0
],
[
"Í",
0.0
],
[
"Î",
0.0
],
[
"Ï",
0.0
],
[
"Ð",
0.0
],
[
"Ñ",
0.0
],
[
"Ò",
0.0
],
[
"Ó",
0.0
],
[
"Ô",
0.0
],
[
"Õ",
0.0
],
[
"Ö",
0.0
],
[
"×",
0.0
],
[
"Ø",
0.0
],
[
"Ù",
0.0
],
[
"Ú",
0.0
],
[
"Û",
0.0
],
[
"Ü",
0.0
],
[
"Ý",
0.0
],
[
"Þ",
0.0
],
[
"ß",
0.0
],
[
"à",
0.0
],
[
"á",
0.0
],
[
"â",
0.0
],
[
"ã",
0.0
],
[
"ä",
0.0
],
[
"å",
0.0
],
[
"æ",
0.0
],
[
"ç",
0.0
],
[
"è",
0.0
],
[
"é",
0.0
],
[
"ê",
0.0
],
[
"ë",
0.0
],
[
"ì",
0.0
],
[
"í",
0.0
],
[
"î",
0.0
],
[
"ï",
0.0
],
[
"ð",
0.0
],
[
"ñ",
0.0
],
[
"ò",
0.0
],
[
"ó",
0.0
],
[
"ô",
0.0
],
[
"õ",
0.0
],
[
"ö",
0.0
],
[
"÷",
0.0
],
[
"ø",
0.0
],
[
"ù",
0.0
],
[
"ú",
0.0
],
[
"û",
0.0
],
[
"ü",
0.0
],
[
"ý",
0.0
],
[
"þ",
0.0
],
[
"ÿ",
0.0
],
[
"<|begin_of_text|>",
0.0
],
[
"<|end_header_id|>",
0.0
],
[
"<|end_of_text|>",
0.0
],
[
"<|eot_id|>",
0.0
],
[
"<|start_header_id|>",
0.0
],
[
"assistant",
0.0
],
[
"system",
0.0
],
[
"user",
0.0
],
[
"ĊĊ",
0.0
]
],
"byte_fallback": false
}
}