Adjust max length in tokenizer config
Browse filesUpdate tokenizer configuration to align max length with `config.json`. This change does not affect the tokenizer's output. It only prevents (incorrect) warnings for sequences exceeding 1024 tokens.
- tokenizer_config.json +2 -2
tokenizer_config.json
CHANGED
@@ -46,8 +46,8 @@
|
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": false,
|
48 |
"mask_token": "[MASK]",
|
49 |
-
"max_len":
|
50 |
-
"model_max_length":
|
51 |
"never_split": null,
|
52 |
"pad_token": "[PAD]",
|
53 |
"sep_token": "[SEP]",
|
|
|
46 |
"do_basic_tokenize": true,
|
47 |
"do_lower_case": false,
|
48 |
"mask_token": "[MASK]",
|
49 |
+
"max_len": 8192,
|
50 |
+
"model_max_length": 8192,
|
51 |
"never_split": null,
|
52 |
"pad_token": "[PAD]",
|
53 |
"sep_token": "[SEP]",
|