danielhanchen
commited on
Commit
•
21064a3
1
Parent(s):
78b34bf
Upload tokenizer
Browse files- tokenizer.json +1 -0
- tokenizer_config.json +2 -1
tokenizer.json
CHANGED
@@ -134,6 +134,7 @@
|
|
134 |
"end_of_word_suffix": null,
|
135 |
"fuse_unk": true,
|
136 |
"byte_fallback": true,
|
|
|
137 |
"vocab": {
|
138 |
"<unk>": 0,
|
139 |
"<s>": 1,
|
|
|
134 |
"end_of_word_suffix": null,
|
135 |
"fuse_unk": true,
|
136 |
"byte_fallback": true,
|
137 |
+
"ignore_merges": false,
|
138 |
"vocab": {
|
139 |
"<unk>": 0,
|
140 |
"<s>": 1,
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
|
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
6 |
"content": "<unk>",
|
@@ -34,7 +35,7 @@
|
|
34 |
"legacy": false,
|
35 |
"model_max_length": 2048,
|
36 |
"pad_token": "<unk>",
|
37 |
-
"padding_side": "
|
38 |
"sp_model_kwargs": {},
|
39 |
"tokenizer_class": "LlamaTokenizer",
|
40 |
"unk_token": "<unk>",
|
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|
|
|
35 |
"legacy": false,
|
36 |
"model_max_length": 2048,
|
37 |
"pad_token": "<unk>",
|
38 |
+
"padding_side": "left",
|
39 |
"sp_model_kwargs": {},
|
40 |
"tokenizer_class": "LlamaTokenizer",
|
41 |
"unk_token": "<unk>",
|