Upload tokenizer
Browse files- tokenizer.json +6 -16
- tokenizer_config.json +1 -1
tokenizer.json
CHANGED
@@ -58,23 +58,13 @@
|
|
58 |
"special": true
|
59 |
}
|
60 |
],
|
61 |
-
"normalizer":
|
62 |
-
|
63 |
-
"
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
},
|
68 |
-
{
|
69 |
-
"type": "Replace",
|
70 |
-
"pattern": {
|
71 |
-
"String": " "
|
72 |
-
},
|
73 |
-
"content": "▁"
|
74 |
-
}
|
75 |
-
]
|
76 |
},
|
77 |
-
"pre_tokenizer": null,
|
78 |
"post_processor": {
|
79 |
"type": "TemplateProcessing",
|
80 |
"single": [
|
|
|
58 |
"special": true
|
59 |
}
|
60 |
],
|
61 |
+
"normalizer": null,
|
62 |
+
"pre_tokenizer": {
|
63 |
+
"type": "Metaspace",
|
64 |
+
"replacement": "▁",
|
65 |
+
"prepend_scheme": "first",
|
66 |
+
"split": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
},
|
|
|
68 |
"post_processor": {
|
69 |
"type": "TemplateProcessing",
|
70 |
"single": [
|
tokenizer_config.json
CHANGED
@@ -60,7 +60,7 @@
|
|
60 |
"clean_up_tokenization_spaces": false,
|
61 |
"eos_token": "<|im_end|>",
|
62 |
"extra_special_tokens": {},
|
63 |
-
"legacy":
|
64 |
"model_max_length": 1000000000000000019884624838656,
|
65 |
"pad_token": "<|im_end|>",
|
66 |
"sp_model_kwargs": {},
|
|
|
60 |
"clean_up_tokenization_spaces": false,
|
61 |
"eos_token": "<|im_end|>",
|
62 |
"extra_special_tokens": {},
|
63 |
+
"legacy": false,
|
64 |
"model_max_length": 1000000000000000019884624838656,
|
65 |
"pad_token": "<|im_end|>",
|
66 |
"sp_model_kwargs": {},
|