stefan-it commited on
Commit
3021b3d
·
1 Parent(s): db00698

tokenizer: add new post processor: </s> is prepended to input sequence

Browse files
Files changed (1) hide show
  1. tokenizer.json +41 -5
tokenizer.json CHANGED
@@ -57,10 +57,46 @@
57
  "use_regex": true
58
  },
59
  "post_processor": {
60
- "type": "ByteLevel",
61
- "add_prefix_space": true,
62
- "trim_offsets": false,
63
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  },
65
  "decoder": {
66
  "type": "ByteLevel",
@@ -100334,4 +100370,4 @@
100334
  "rie gel"
100335
  ]
100336
  }
100337
- }
 
57
  "use_regex": true
58
  },
59
  "post_processor": {
60
+ "type": "TemplateProcessing",
61
+ "single": [
62
+ {
63
+ "SpecialToken": {
64
+ "id": "</s>",
65
+ "type_id": 0
66
+ }
67
+ },
68
+ {
69
+ "Sequence": {
70
+ "id": "A",
71
+ "type_id": 0
72
+ }
73
+ }
74
+ ],
75
+ "pair": [
76
+ {
77
+ "Sequence": {
78
+ "id": "A",
79
+ "type_id": 0
80
+ }
81
+ },
82
+ {
83
+ "Sequence": {
84
+ "id": "B",
85
+ "type_id": 1
86
+ }
87
+ }
88
+ ],
89
+ "special_tokens": {
90
+ "</s>": {
91
+ "id": "</s>",
92
+ "ids": [
93
+ 2
94
+ ],
95
+ "tokens": [
96
+ "</s>"
97
+ ]
98
+ }
99
+ }
100
  },
101
  "decoder": {
102
  "type": "ByteLevel",
 
100370
  "rie gel"
100371
  ]
100372
  }
100373
+ }