teowu commited on
Commit
9bf90d7
·
verified ·
1 Parent(s): 19ecf67

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +5 -27
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -9,38 +10,15 @@
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
- },
13
- "1": {
14
- "content": "<|startoftext|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "<|endoftext|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
  }
29
  },
30
- "bos_token": "<|startoftext|>",
 
31
  "clean_up_tokenization_spaces": false,
32
- "eos_token": "<|endoftext|>",
33
  "legacy": true,
34
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
35
  "model_max_length": 1000000000000000019884624838656,
36
- "pad_token": {
37
- "__type": "AddedToken",
38
- "content": "<unk>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
  "sp_model_kwargs": {},
45
  "spaces_between_special_tokens": false,
46
  "tokenizer_class": "LlamaTokenizer",
 
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
  },
15
+ "bos_token": null,
16
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
17
  "clean_up_tokenization_spaces": false,
18
+ "eos_token": null,
19
  "legacy": true,
 
20
  "model_max_length": 1000000000000000019884624838656,
21
+ "pad_token": null,
 
 
 
 
 
 
 
22
  "sp_model_kwargs": {},
23
  "spaces_between_special_tokens": false,
24
  "tokenizer_class": "LlamaTokenizer",