julioc-p commited on
Commit
5b5bc15
·
verified ·
1 Parent(s): cbbbe9b

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +6 -16
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -58,23 +58,13 @@
58
  "special": true
59
  }
60
  ],
61
- "normalizer": {
62
- "type": "Sequence",
63
- "normalizers": [
64
- {
65
- "type": "Prepend",
66
- "prepend": "▁"
67
- },
68
- {
69
- "type": "Replace",
70
- "pattern": {
71
- "String": " "
72
- },
73
- "content": "▁"
74
- }
75
- ]
76
  },
77
- "pre_tokenizer": null,
78
  "post_processor": {
79
  "type": "TemplateProcessing",
80
  "single": [
 
58
  "special": true
59
  }
60
  ],
61
+ "normalizer": null,
62
+ "pre_tokenizer": {
63
+ "type": "Metaspace",
64
+ "replacement": "▁",
65
+ "prepend_scheme": "first",
66
+ "split": false
 
 
 
 
 
 
 
 
 
67
  },
 
68
  "post_processor": {
69
  "type": "TemplateProcessing",
70
  "single": [
tokenizer_config.json CHANGED
@@ -60,7 +60,7 @@
60
  "clean_up_tokenization_spaces": false,
61
  "eos_token": "<|im_end|>",
62
  "extra_special_tokens": {},
63
- "legacy": true,
64
  "model_max_length": 1000000000000000019884624838656,
65
  "pad_token": "<|im_end|>",
66
  "sp_model_kwargs": {},
 
60
  "clean_up_tokenization_spaces": false,
61
  "eos_token": "<|im_end|>",
62
  "extra_special_tokens": {},
63
+ "legacy": false,
64
  "model_max_length": 1000000000000000019884624838656,
65
  "pad_token": "<|im_end|>",
66
  "sp_model_kwargs": {},