lunahr commited on
Commit
dc71344
·
1 Parent(s): 34232f9

Apply @unsloth tokenizer fixes

Browse files
README.md CHANGED
@@ -30,6 +30,7 @@ pipeline_tag: text-generation
30
  tags:
31
  - nlp
32
  - code
 
33
  widget:
34
  - messages:
35
  - role: user
 
30
  tags:
31
  - nlp
32
  - code
33
+ - abliterated
34
  widget:
35
  - messages:
36
  - role: user
generation_config.json CHANGED
@@ -1,10 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 199999,
4
- "eos_token_id": [
5
- 200020,
6
- 199999
7
- ],
8
- "pad_token_id": 199999,
9
  "transformers_version": "4.45.0"
10
- }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 199999,
4
+ "eos_token_id": 200020,
5
+ "pad_token_id": 200029,
 
 
 
6
  "transformers_version": "4.45.0"
7
+ }
special_tokens_map.json CHANGED
@@ -7,21 +7,14 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|endoftext|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<|endoftext|>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|PAD_TOKEN|>",
 
 
 
 
 
 
 
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -98,14 +98,24 @@
98
  "rstrip": true,
99
  "single_word": false,
100
  "special": true
 
 
 
 
 
 
 
 
101
  }
102
  },
103
  "bos_token": "<|endoftext|>",
104
- "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
105
  "clean_up_tokenization_spaces": false,
106
- "eos_token": "<|endoftext|>",
 
107
  "model_max_length": 131072,
108
- "pad_token": "<|endoftext|>",
 
109
  "tokenizer_class": "GPT2Tokenizer",
110
- "unk_token": "<|endoftext|>"
111
  }
 
98
  "rstrip": true,
99
  "single_word": false,
100
  "special": true
101
+ },
102
+ "200029": {
103
+ "content": "<|PAD▁TOKEN|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
  }
110
  },
111
  "bos_token": "<|endoftext|>",
112
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% endif %}",
113
  "clean_up_tokenization_spaces": false,
114
+ "eos_token": "<|end|>",
115
+ "extra_special_tokens": {},
116
  "model_max_length": 131072,
117
+ "pad_token": "<|PAD_TOKEN|>",
118
+ "padding_side": "left",
119
  "tokenizer_class": "GPT2Tokenizer",
120
+ "unk_token": "�"
121
  }