DRXD1000 commited on
Commit
a36ec7b
·
verified ·
1 Parent(s): 57097d1

Adding Special Token

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +189 -19
  2. tokenizer.json +16 -16
  3. tokenizer_config.json +25 -17
special_tokens_map.json CHANGED
@@ -1,24 +1,194 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|endoftext|>",
4
- "<fim_prefix>",
5
- "<fim_middle>",
6
- "<fim_suffix>",
7
- "<fim_pad>",
8
- "<filename>",
9
- "<gh_stars>",
10
- "<issue_start>",
11
- "<issue_comment>",
12
- "<issue_closed>",
13
- "<jupyter_start>",
14
- "<jupyter_text>",
15
- "<jupyter_code>",
16
- "<jupyter_output>",
17
- "<empty_output>",
18
- "<commit_before>",
19
- "<commit_msg>",
20
- "<commit_after>",
21
- "<reponame>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  ],
23
  "bos_token": {
24
  "content": "<|endoftext|>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<fim_prefix>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<fim_middle>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<fim_suffix>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<fim_pad>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<filename>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<gh_stars>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<issue_start>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<issue_comment>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<issue_closed>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<jupyter_start>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<jupyter_text>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<jupyter_code>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<jupyter_output>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<empty_output>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "<commit_before>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<commit_msg>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "<commit_after>",
124
+ "lstrip": false,
125
+ "normalized": false,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<reponame>",
131
+ "lstrip": false,
132
+ "normalized": false,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "[INST]",
138
+ "lstrip": false,
139
+ "normalized": false,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "[/INST]",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "[AVAILABLE_TOOLS]",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "[/AVAILABLE_TOOLS]",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "[TOOL_CALLS]",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "[TOOL_RESULTS]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "[/TOOL_RESULTS]",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "<|pad|>",
187
+ "lstrip": false,
188
+ "normalized": false,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ }
192
  ],
193
  "bos_token": {
194
  "content": "<|endoftext|>",
tokenizer.json CHANGED
@@ -180,8 +180,8 @@
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": true,
184
- "special": false
185
  },
186
  {
187
  "id": 49153,
@@ -189,8 +189,8 @@
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": true,
193
- "special": false
194
  },
195
  {
196
  "id": 49154,
@@ -198,8 +198,8 @@
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": true,
202
- "special": false
203
  },
204
  {
205
  "id": 49155,
@@ -207,8 +207,8 @@
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": true,
211
- "special": false
212
  },
213
  {
214
  "id": 49156,
@@ -216,8 +216,8 @@
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": true,
220
- "special": false
221
  },
222
  {
223
  "id": 49157,
@@ -225,8 +225,8 @@
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": true,
229
- "special": false
230
  },
231
  {
232
  "id": 49158,
@@ -234,8 +234,8 @@
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": true,
238
- "special": false
239
  },
240
  {
241
  "id": 49159,
@@ -243,8 +243,8 @@
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": true,
247
- "special": false
248
  }
249
  ],
250
  "normalizer": null,
 
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
  },
186
  {
187
  "id": 49153,
 
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
  },
195
  {
196
  "id": 49154,
 
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
  },
204
  {
205
  "id": 49155,
 
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
  },
213
  {
214
  "id": 49156,
 
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
  },
222
  {
223
  "id": 49157,
 
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
  },
231
  {
232
  "id": 49158,
 
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
  },
240
  {
241
  "id": 49159,
 
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
  }
249
  ],
250
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -156,66 +156,66 @@
156
  "49152": {
157
  "content": "[INST]",
158
  "lstrip": false,
159
- "normalized": true,
160
  "rstrip": false,
161
  "single_word": false,
162
- "special": false
163
  },
164
  "49153": {
165
  "content": "[/INST]",
166
  "lstrip": false,
167
- "normalized": true,
168
  "rstrip": false,
169
  "single_word": false,
170
- "special": false
171
  },
172
  "49154": {
173
  "content": "[AVAILABLE_TOOLS]",
174
  "lstrip": false,
175
- "normalized": true,
176
  "rstrip": false,
177
  "single_word": false,
178
- "special": false
179
  },
180
  "49155": {
181
  "content": "[/AVAILABLE_TOOLS]",
182
  "lstrip": false,
183
- "normalized": true,
184
  "rstrip": false,
185
  "single_word": false,
186
- "special": false
187
  },
188
  "49156": {
189
  "content": "[TOOL_CALLS]",
190
  "lstrip": false,
191
- "normalized": true,
192
  "rstrip": false,
193
  "single_word": false,
194
- "special": false
195
  },
196
  "49157": {
197
  "content": "[TOOL_RESULTS]",
198
  "lstrip": false,
199
- "normalized": true,
200
  "rstrip": false,
201
  "single_word": false,
202
- "special": false
203
  },
204
  "49158": {
205
  "content": "[/TOOL_RESULTS]",
206
  "lstrip": false,
207
- "normalized": true,
208
  "rstrip": false,
209
  "single_word": false,
210
- "special": false
211
  },
212
  "49159": {
213
  "content": "<|pad|>",
214
  "lstrip": false,
215
- "normalized": true,
216
  "rstrip": false,
217
  "single_word": false,
218
- "special": false
219
  }
220
  },
221
  "additional_special_tokens": [
@@ -237,7 +237,15 @@
237
  "<commit_before>",
238
  "<commit_msg>",
239
  "<commit_after>",
240
- "<reponame>"
 
 
 
 
 
 
 
 
241
  ],
242
  "bos_token": "<|endoftext|>",
243
  "chat_template": "\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if loop.index0 == 0 and system_message %}\n {{- \"[INST] \" + system_message + \"\n\n\" + message[\"content\"].rstrip() + \"[/INST]\n\" }}\n {%- else %}\n {{- \"[INST] \" + message[\"content\"].rstrip() + \"[/INST]\n\" }}\n {%- endif %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- \" \" + message[\"content\"].strip() + eos_token +\"\n\" }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {{ \"\n\" }}\n{%- endif %}\n",
 
156
  "49152": {
157
  "content": "[INST]",
158
  "lstrip": false,
159
+ "normalized": false,
160
  "rstrip": false,
161
  "single_word": false,
162
+ "special": true
163
  },
164
  "49153": {
165
  "content": "[/INST]",
166
  "lstrip": false,
167
+ "normalized": false,
168
  "rstrip": false,
169
  "single_word": false,
170
+ "special": true
171
  },
172
  "49154": {
173
  "content": "[AVAILABLE_TOOLS]",
174
  "lstrip": false,
175
+ "normalized": false,
176
  "rstrip": false,
177
  "single_word": false,
178
+ "special": true
179
  },
180
  "49155": {
181
  "content": "[/AVAILABLE_TOOLS]",
182
  "lstrip": false,
183
+ "normalized": false,
184
  "rstrip": false,
185
  "single_word": false,
186
+ "special": true
187
  },
188
  "49156": {
189
  "content": "[TOOL_CALLS]",
190
  "lstrip": false,
191
+ "normalized": false,
192
  "rstrip": false,
193
  "single_word": false,
194
+ "special": true
195
  },
196
  "49157": {
197
  "content": "[TOOL_RESULTS]",
198
  "lstrip": false,
199
+ "normalized": false,
200
  "rstrip": false,
201
  "single_word": false,
202
+ "special": true
203
  },
204
  "49158": {
205
  "content": "[/TOOL_RESULTS]",
206
  "lstrip": false,
207
+ "normalized": false,
208
  "rstrip": false,
209
  "single_word": false,
210
+ "special": true
211
  },
212
  "49159": {
213
  "content": "<|pad|>",
214
  "lstrip": false,
215
+ "normalized": false,
216
  "rstrip": false,
217
  "single_word": false,
218
+ "special": true
219
  }
220
  },
221
  "additional_special_tokens": [
 
237
  "<commit_before>",
238
  "<commit_msg>",
239
  "<commit_after>",
240
+ "<reponame>",
241
+ "[INST]",
242
+ "[/INST]",
243
+ "[AVAILABLE_TOOLS]",
244
+ "[/AVAILABLE_TOOLS]",
245
+ "[TOOL_CALLS]",
246
+ "[TOOL_RESULTS]",
247
+ "[/TOOL_RESULTS]",
248
+ "<|pad|>"
249
  ],
250
  "bos_token": "<|endoftext|>",
251
  "chat_template": "\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{%- for message in loop_messages %}\n {%- if message[\"role\"] == \"user\" %}\n {%- if loop.index0 == 0 and system_message %}\n {{- \"[INST] \" + system_message + \"\n\n\" + message[\"content\"].rstrip() + \"[/INST]\n\" }}\n {%- else %}\n {{- \"[INST] \" + message[\"content\"].rstrip() + \"[/INST]\n\" }}\n {%- endif %}\n {%- elif message[\"role\"] == \"assistant\" %}\n {{- \" \" + message[\"content\"].strip() + eos_token +\"\n\" }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {{ \"\n\" }}\n{%- endif %}\n",