Upload processor
Browse files- added_tokens.json +1 -0
- chat_template.jinja +1 -0
- preprocessor_config.json +6 -0
- special_tokens_map.json +2 -14
- tokenizer.json +2 -2
- tokenizer_config.json +9 -2
added_tokens.json
CHANGED
@@ -8,6 +8,7 @@
|
|
8 |
"<|im_end|>": 151645,
|
9 |
"<|im_start|>": 151644,
|
10 |
"<|image|>": 152068,
|
|
|
11 |
"|<EXTRA_TOKENS_0>|": 151646,
|
12 |
"|<EXTRA_TOKENS_100>|": 151746,
|
13 |
"|<EXTRA_TOKENS_101>|": 151747,
|
|
|
8 |
"<|im_end|>": 151645,
|
9 |
"<|im_start|>": 151644,
|
10 |
"<|image|>": 152068,
|
11 |
+
"<|pad|>": 152070,
|
12 |
"|<EXTRA_TOKENS_0>|": 151646,
|
13 |
"|<EXTRA_TOKENS_100>|": 151746,
|
14 |
"|<EXTRA_TOKENS_101>|": 151747,
|
chat_template.jinja
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{{ bos_token or '' }}{% for message in messages %}{%- if (loop.index % 2 == 1 and message['role'] != 'user') or (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{%- endif -%}{% if message['content'] is not string %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image> ' }}{% endif %}{% endfor %}{% endif %}{{ message['role'].capitalize() + ': ' }}{% if message['content'] is string %}{{ message['content'] + ' ' }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'text' %}{{ content['text'] + ' ' }}{% endif %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
|
preprocessor_config.json
CHANGED
@@ -11,6 +11,10 @@
|
|
11 |
"crop_size": 336,
|
12 |
"crop_window_patches": 16,
|
13 |
"crop_window_size": 224,
|
|
|
|
|
|
|
|
|
14 |
"do_convert_rgb": true,
|
15 |
"do_normalize": true,
|
16 |
"do_pad": true,
|
@@ -36,6 +40,7 @@
|
|
36 |
],
|
37 |
"image_token_length_h": 12,
|
38 |
"image_token_length_w": 12,
|
|
|
39 |
"max_crops": 12,
|
40 |
"max_num_crops": 12,
|
41 |
"overlap_margins": [
|
@@ -49,6 +54,7 @@
|
|
49 |
"processor_class": "MolmoProcessor",
|
50 |
"resample": 2,
|
51 |
"rescale_factor": 0.00392156862745098,
|
|
|
52 |
"size": {
|
53 |
"height": 336,
|
54 |
"width": 336
|
|
|
11 |
"crop_size": 336,
|
12 |
"crop_window_patches": 16,
|
13 |
"crop_window_size": 224,
|
14 |
+
"data_format": "channels_first",
|
15 |
+
"default_to_square": true,
|
16 |
+
"device": null,
|
17 |
+
"do_center_crop": null,
|
18 |
"do_convert_rgb": true,
|
19 |
"do_normalize": true,
|
20 |
"do_pad": true,
|
|
|
40 |
],
|
41 |
"image_token_length_h": 12,
|
42 |
"image_token_length_w": 12,
|
43 |
+
"input_data_format": null,
|
44 |
"max_crops": 12,
|
45 |
"max_num_crops": 12,
|
46 |
"overlap_margins": [
|
|
|
54 |
"processor_class": "MolmoProcessor",
|
55 |
"resample": 2,
|
56 |
"rescale_factor": 0.00392156862745098,
|
57 |
+
"return_tensors": null,
|
58 |
"size": {
|
59 |
"height": 336,
|
60 |
"width": 336
|
special_tokens_map.json
CHANGED
@@ -425,13 +425,7 @@
|
|
425 |
"<|image|>"
|
426 |
],
|
427 |
"boi_token": "<im_start>",
|
428 |
-
"bos_token":
|
429 |
-
"content": "<|endoftext|>",
|
430 |
-
"lstrip": false,
|
431 |
-
"normalized": false,
|
432 |
-
"rstrip": false,
|
433 |
-
"single_word": false
|
434 |
-
},
|
435 |
"eoi_token": "<im_end>",
|
436 |
"eos_token": {
|
437 |
"content": "<|endoftext|>",
|
@@ -443,11 +437,5 @@
|
|
443 |
"im_col_token": "<im_col>",
|
444 |
"im_patch_token": "<im_patch>",
|
445 |
"image_token": "<image>",
|
446 |
-
"pad_token":
|
447 |
-
"content": "<|endoftext|>",
|
448 |
-
"lstrip": false,
|
449 |
-
"normalized": false,
|
450 |
-
"rstrip": false,
|
451 |
-
"single_word": false
|
452 |
-
}
|
453 |
}
|
|
|
425 |
"<|image|>"
|
426 |
],
|
427 |
"boi_token": "<im_start>",
|
428 |
+
"bos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
"eoi_token": "<im_end>",
|
430 |
"eos_token": {
|
431 |
"content": "<|endoftext|>",
|
|
|
437 |
"im_col_token": "<im_col>",
|
438 |
"im_patch_token": "<im_patch>",
|
439 |
"image_token": "<image>",
|
440 |
+
"pad_token": "<|pad|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
}
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e12d99cec7795d0d3dd206aa62255db4c8c6a1ddf644fc2b304703b1c34a29d
|
3 |
+
size 11501800
|
tokenizer_config.json
CHANGED
@@ -3416,6 +3416,14 @@
|
|
3416 |
"rstrip": false,
|
3417 |
"single_word": false,
|
3418 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3419 |
}
|
3420 |
},
|
3421 |
"additional_special_tokens": [
|
@@ -3848,7 +3856,6 @@
|
|
3848 |
},
|
3849 |
"boi_token": "<im_start>",
|
3850 |
"bos_token": "<|endoftext|>",
|
3851 |
-
"chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
|
3852 |
"clean_up_tokenization_spaces": false,
|
3853 |
"eoi_token": "<im_end>",
|
3854 |
"eos_token": "<|endoftext|>",
|
@@ -3864,7 +3871,7 @@
|
|
3864 |
"im_patch_token": "<im_patch>",
|
3865 |
"image_token": "<image>",
|
3866 |
"model_max_length": 32768,
|
3867 |
-
"pad_token": "<|
|
3868 |
"processor_class": "MolmoProcessor",
|
3869 |
"split_special_tokens": false,
|
3870 |
"tokenizer_class": "Qwen2Tokenizer",
|
|
|
3416 |
"rstrip": false,
|
3417 |
"single_word": false,
|
3418 |
"special": true
|
3419 |
+
},
|
3420 |
+
"152070": {
|
3421 |
+
"content": "<|pad|>",
|
3422 |
+
"lstrip": false,
|
3423 |
+
"normalized": false,
|
3424 |
+
"rstrip": false,
|
3425 |
+
"single_word": false,
|
3426 |
+
"special": true
|
3427 |
}
|
3428 |
},
|
3429 |
"additional_special_tokens": [
|
|
|
3856 |
},
|
3857 |
"boi_token": "<im_start>",
|
3858 |
"bos_token": "<|endoftext|>",
|
|
|
3859 |
"clean_up_tokenization_spaces": false,
|
3860 |
"eoi_token": "<im_end>",
|
3861 |
"eos_token": "<|endoftext|>",
|
|
|
3871 |
"im_patch_token": "<im_patch>",
|
3872 |
"image_token": "<image>",
|
3873 |
"model_max_length": 32768,
|
3874 |
+
"pad_token": "<|pad|>",
|
3875 |
"processor_class": "MolmoProcessor",
|
3876 |
"split_special_tokens": false,
|
3877 |
"tokenizer_class": "Qwen2Tokenizer",
|