SalomonMetre13 commited on
Commit
b874234
·
verified ·
1 Parent(s): dc15cdc

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +7 -0
tokenizer_config.json CHANGED
@@ -1884,12 +1884,19 @@
1884
  "extra_special_tokens": {},
1885
  "legacy_behaviour": false,
1886
  "mask_token": "<mask>",
 
1887
  "model_max_length": 1024,
 
1888
  "pad_token": "<pad>",
 
 
1889
  "sep_token": "</s>",
1890
  "sp_model_kwargs": {},
1891
  "src_lang": "eng_Latn",
 
1892
  "tgt_lang": null,
1893
  "tokenizer_class": "NllbTokenizer",
 
 
1894
  "unk_token": "<unk>"
1895
  }
 
1884
  "extra_special_tokens": {},
1885
  "legacy_behaviour": false,
1886
  "mask_token": "<mask>",
1887
+ "max_length": 256,
1888
  "model_max_length": 1024,
1889
+ "pad_to_multiple_of": null,
1890
  "pad_token": "<pad>",
1891
+ "pad_token_type_id": 0,
1892
+ "padding_side": "right",
1893
  "sep_token": "</s>",
1894
  "sp_model_kwargs": {},
1895
  "src_lang": "eng_Latn",
1896
+ "stride": 0,
1897
  "tgt_lang": null,
1898
  "tokenizer_class": "NllbTokenizer",
1899
+ "truncation_side": "right",
1900
+ "truncation_strategy": "longest_first",
1901
  "unk_token": "<unk>"
1902
  }