krd-language-model / tokenizer_config.json
3ed0k4's picture
Update tokenizer_config.json
81e8963 verified
{
"tokenizer_class": "PreTrainedTokenizerFast",
"model_max_length": 2048,
"padding_side": "right",
"special_tokens": {
"pad_token": "<pad>",
"unk_token": "<unk>",
"bos_token": "<s>",
"eos_token": "</s>"
},
"model_file": "krd.model",
"vocab_file": "krd.vocab",
"clean_up_tokenization_spaces": true
}