Upload tokenizer
Browse files- chat_template.jinja +4 -0
- tokenizer.json +16 -6
- tokenizer_config.json +1 -2
chat_template.jinja
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{% for message in messages %}{{'<|im_start|>' + message['role'] + '
|
2 |
+
' + message['content'] + '<|im_end|>' + '
|
3 |
+
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
|
4 |
+
' }}{% endif %}
|
tokenizer.json
CHANGED
@@ -58,13 +58,23 @@
|
|
58 |
"special": true
|
59 |
}
|
60 |
],
|
61 |
-
"normalizer":
|
62 |
-
|
63 |
-
"
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
},
|
|
|
68 |
"post_processor": {
|
69 |
"type": "TemplateProcessing",
|
70 |
"single": [
|
|
|
58 |
"special": true
|
59 |
}
|
60 |
],
|
61 |
+
"normalizer": {
|
62 |
+
"type": "Sequence",
|
63 |
+
"normalizers": [
|
64 |
+
{
|
65 |
+
"type": "Prepend",
|
66 |
+
"prepend": "▁"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"type": "Replace",
|
70 |
+
"pattern": {
|
71 |
+
"String": " "
|
72 |
+
},
|
73 |
+
"content": "▁"
|
74 |
+
}
|
75 |
+
]
|
76 |
},
|
77 |
+
"pre_tokenizer": null,
|
78 |
"post_processor": {
|
79 |
"type": "TemplateProcessing",
|
80 |
"single": [
|
tokenizer_config.json
CHANGED
@@ -57,11 +57,10 @@
|
|
57 |
"<|im_end|>"
|
58 |
],
|
59 |
"bos_token": "<|im_start|>",
|
60 |
-
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
61 |
"clean_up_tokenization_spaces": false,
|
62 |
"eos_token": "<|im_end|>",
|
63 |
"extra_special_tokens": {},
|
64 |
-
"legacy":
|
65 |
"model_max_length": 1000000000000000019884624838656,
|
66 |
"pad_token": "<|im_end|>",
|
67 |
"sp_model_kwargs": {},
|
|
|
57 |
"<|im_end|>"
|
58 |
],
|
59 |
"bos_token": "<|im_start|>",
|
|
|
60 |
"clean_up_tokenization_spaces": false,
|
61 |
"eos_token": "<|im_end|>",
|
62 |
"extra_special_tokens": {},
|
63 |
+
"legacy": true,
|
64 |
"model_max_length": 1000000000000000019884624838656,
|
65 |
"pad_token": "<|im_end|>",
|
66 |
"sp_model_kwargs": {},
|