Slowin commited on
Commit
df47832
·
verified ·
1 Parent(s): 0b7cc6e

Upload tokenizer Upload version v18.0 (Trained with Unsloth)

Browse files
added_tokens.json CHANGED
@@ -1,3 +1,40 @@
1
  {
2
- "<image_soft_token>": 262144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
 
1
  {
2
+ "#Address#": 151672,
3
+ "#CarNumber#": 151673,
4
+ "#CardNumber#": 151674,
5
+ "#DateOfBirth#": 151675,
6
+ "#Email#": 151676,
7
+ "#PassportNumber#": 151677,
8
+ "#Person#": 151678,
9
+ "#Person1#": 151665,
10
+ "#Person2#": 151666,
11
+ "#Person3#": 151667,
12
+ "#Person4#": 151668,
13
+ "#Person5#": 151669,
14
+ "#Person6#": 151670,
15
+ "#Person7#": 151671,
16
+ "#PhoneNumber#": 151679,
17
+ "#SSN#": 151680,
18
+ "</tool_call>": 151658,
19
+ "<tool_call>": 151657,
20
+ "<|box_end|>": 151649,
21
+ "<|box_start|>": 151648,
22
+ "<|endoftext|>": 151643,
23
+ "<|file_sep|>": 151664,
24
+ "<|fim_middle|>": 151660,
25
+ "<|fim_pad|>": 151662,
26
+ "<|fim_prefix|>": 151659,
27
+ "<|fim_suffix|>": 151661,
28
+ "<|im_end|>": 151645,
29
+ "<|im_start|>": 151644,
30
+ "<|image_pad|>": 151655,
31
+ "<|object_ref_end|>": 151647,
32
+ "<|object_ref_start|>": 151646,
33
+ "<|quad_end|>": 151651,
34
+ "<|quad_start|>": 151650,
35
+ "<|repo_name|>": 151663,
36
+ "<|video_pad|>": 151656,
37
+ "<|vision_end|>": 151653,
38
+ "<|vision_pad|>": 151654,
39
+ "<|vision_start|>": 151652
40
  }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,30 +1,28 @@
1
  {
2
- "boi_token": "<start_of_image>",
3
- "bos_token": {
4
- "content": "<bos>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- "eoi_token": "<end_of_image>",
 
 
 
 
 
 
11
  "eos_token": {
12
- "content": "<end_of_turn>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
- "image_token": "<image_soft_token>",
19
  "pad_token": {
20
- "content": "<pad>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false
25
- },
26
- "unk_token": {
27
- "content": "<unk>",
28
  "lstrip": false,
29
  "normalized": false,
30
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
  "eos_token": {
18
+ "content": "<|endoftext|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
 
24
  "pad_token": {
25
+ "content": "<|vision_pad|>",
 
 
 
 
 
 
 
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
- size 33384568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab041e54bd9d8f05e954a5eeea0c1619f88b2390ed097dd02f2da81b8f5cbe7
3
+ size 11424885
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff