Slowin commited on
Commit
a8fad9c
·
verified ·
1 Parent(s): 14bc2a5

Upload tokenizer Upload version v18.0 (Trained with Unsloth)

Browse files
added_tokens.json CHANGED
@@ -1,40 +1,3 @@
1
  {
2
- "#Address#": 151672,
3
- "#CarNumber#": 151673,
4
- "#CardNumber#": 151674,
5
- "#DateOfBirth#": 151675,
6
- "#Email#": 151676,
7
- "#PassportNumber#": 151677,
8
- "#Person#": 151678,
9
- "#Person1#": 151665,
10
- "#Person2#": 151666,
11
- "#Person3#": 151667,
12
- "#Person4#": 151668,
13
- "#Person5#": 151669,
14
- "#Person6#": 151670,
15
- "#Person7#": 151671,
16
- "#PhoneNumber#": 151679,
17
- "#SSN#": 151680,
18
- "</tool_call>": 151658,
19
- "<tool_call>": 151657,
20
- "<|box_end|>": 151649,
21
- "<|box_start|>": 151648,
22
- "<|endoftext|>": 151643,
23
- "<|file_sep|>": 151664,
24
- "<|fim_middle|>": 151660,
25
- "<|fim_pad|>": 151662,
26
- "<|fim_prefix|>": 151659,
27
- "<|fim_suffix|>": 151661,
28
- "<|im_end|>": 151645,
29
- "<|im_start|>": 151644,
30
- "<|image_pad|>": 151655,
31
- "<|object_ref_end|>": 151647,
32
- "<|object_ref_start|>": 151646,
33
- "<|quad_end|>": 151651,
34
- "<|quad_start|>": 151650,
35
- "<|repo_name|>": 151663,
36
- "<|video_pad|>": 151656,
37
- "<|vision_end|>": 151653,
38
- "<|vision_pad|>": 151654,
39
- "<|vision_start|>": 151652
40
  }
 
1
  {
2
+ "<image_soft_token>": 262144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
special_tokens_map.json CHANGED
@@ -1,28 +1,30 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
16
- ],
17
  "eos_token": {
18
- "content": "<|endoftext|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
 
24
  "pad_token": {
25
- "content": "<|vision_pad|>",
 
 
 
 
 
 
 
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
1
  {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
 
 
 
 
 
 
11
  "eos_token": {
12
+ "content": "<end_of_turn>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
+ "image_token": "<image_soft_token>",
19
  "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
  "lstrip": false,
29
  "normalized": false,
30
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab041e54bd9d8f05e954a5eeea0c1619f88b2390ed097dd02f2da81b8f5cbe7
3
- size 11424885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff