Molbap HF Staff commited on
Commit
421084f
·
verified ·
1 Parent(s): f20cabf

Upload processor

Browse files
added_tokens.json CHANGED
@@ -8,6 +8,7 @@
8
  "<|im_end|>": 151645,
9
  "<|im_start|>": 151644,
10
  "<|image|>": 152068,
 
11
  "|<EXTRA_TOKENS_0>|": 151646,
12
  "|<EXTRA_TOKENS_100>|": 151746,
13
  "|<EXTRA_TOKENS_101>|": 151747,
 
8
  "<|im_end|>": 151645,
9
  "<|im_start|>": 151644,
10
  "<|image|>": 152068,
11
+ "<|pad|>": 152070,
12
  "|<EXTRA_TOKENS_0>|": 151646,
13
  "|<EXTRA_TOKENS_100>|": 151746,
14
  "|<EXTRA_TOKENS_101>|": 151747,
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {{ bos_token or '' }}{% for message in messages %}{%- if (loop.index % 2 == 1 and message['role'] != 'user') or (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{%- endif -%}{% if message['content'] is not string %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image> ' }}{% endif %}{% endfor %}{% endif %}{{ message['role'].capitalize() + ': ' }}{% if message['content'] is string %}{{ message['content'] + ' ' }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'text' %}{{ content['text'] + ' ' }}{% endif %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
preprocessor_config.json CHANGED
@@ -11,6 +11,10 @@
11
  "crop_size": 336,
12
  "crop_window_patches": 16,
13
  "crop_window_size": 224,
 
 
 
 
14
  "do_convert_rgb": true,
15
  "do_normalize": true,
16
  "do_pad": true,
@@ -36,6 +40,7 @@
36
  ],
37
  "image_token_length_h": 12,
38
  "image_token_length_w": 12,
 
39
  "max_crops": 12,
40
  "max_num_crops": 12,
41
  "overlap_margins": [
@@ -49,6 +54,7 @@
49
  "processor_class": "MolmoProcessor",
50
  "resample": 2,
51
  "rescale_factor": 0.00392156862745098,
 
52
  "size": {
53
  "height": 336,
54
  "width": 336
 
11
  "crop_size": 336,
12
  "crop_window_patches": 16,
13
  "crop_window_size": 224,
14
+ "data_format": "channels_first",
15
+ "default_to_square": true,
16
+ "device": null,
17
+ "do_center_crop": null,
18
  "do_convert_rgb": true,
19
  "do_normalize": true,
20
  "do_pad": true,
 
40
  ],
41
  "image_token_length_h": 12,
42
  "image_token_length_w": 12,
43
+ "input_data_format": null,
44
  "max_crops": 12,
45
  "max_num_crops": 12,
46
  "overlap_margins": [
 
54
  "processor_class": "MolmoProcessor",
55
  "resample": 2,
56
  "rescale_factor": 0.00392156862745098,
57
+ "return_tensors": null,
58
  "size": {
59
  "height": 336,
60
  "width": 336
special_tokens_map.json CHANGED
@@ -425,13 +425,7 @@
425
  "<|image|>"
426
  ],
427
  "boi_token": "<im_start>",
428
- "bos_token": {
429
- "content": "<|endoftext|>",
430
- "lstrip": false,
431
- "normalized": false,
432
- "rstrip": false,
433
- "single_word": false
434
- },
435
  "eoi_token": "<im_end>",
436
  "eos_token": {
437
  "content": "<|endoftext|>",
@@ -443,11 +437,5 @@
443
  "im_col_token": "<im_col>",
444
  "im_patch_token": "<im_patch>",
445
  "image_token": "<image>",
446
- "pad_token": {
447
- "content": "<|endoftext|>",
448
- "lstrip": false,
449
- "normalized": false,
450
- "rstrip": false,
451
- "single_word": false
452
- }
453
  }
 
425
  "<|image|>"
426
  ],
427
  "boi_token": "<im_start>",
428
+ "bos_token": "<|endoftext|>",
 
 
 
 
 
 
429
  "eoi_token": "<im_end>",
430
  "eos_token": {
431
  "content": "<|endoftext|>",
 
437
  "im_col_token": "<im_col>",
438
  "im_patch_token": "<im_patch>",
439
  "image_token": "<image>",
440
+ "pad_token": "<|pad|>"
 
 
 
 
 
 
441
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec9836c15b34f4d6e1c2324574589d9886de43d5ba3339b27c5be18ed8a9c0df
3
- size 11501616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e12d99cec7795d0d3dd206aa62255db4c8c6a1ddf644fc2b304703b1c34a29d
3
+ size 11501800
tokenizer_config.json CHANGED
@@ -3416,6 +3416,14 @@
3416
  "rstrip": false,
3417
  "single_word": false,
3418
  "special": true
 
 
 
 
 
 
 
 
3419
  }
3420
  },
3421
  "additional_special_tokens": [
@@ -3848,7 +3856,6 @@
3848
  },
3849
  "boi_token": "<im_start>",
3850
  "bos_token": "<|endoftext|>",
3851
- "chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
3852
  "clean_up_tokenization_spaces": false,
3853
  "eoi_token": "<im_end>",
3854
  "eos_token": "<|endoftext|>",
@@ -3864,7 +3871,7 @@
3864
  "im_patch_token": "<im_patch>",
3865
  "image_token": "<image>",
3866
  "model_max_length": 32768,
3867
- "pad_token": "<|endoftext|>",
3868
  "processor_class": "MolmoProcessor",
3869
  "split_special_tokens": false,
3870
  "tokenizer_class": "Qwen2Tokenizer",
 
3416
  "rstrip": false,
3417
  "single_word": false,
3418
  "special": true
3419
+ },
3420
+ "152070": {
3421
+ "content": "<|pad|>",
3422
+ "lstrip": false,
3423
+ "normalized": false,
3424
+ "rstrip": false,
3425
+ "single_word": false,
3426
+ "special": true
3427
  }
3428
  },
3429
  "additional_special_tokens": [
 
3856
  },
3857
  "boi_token": "<im_start>",
3858
  "bos_token": "<|endoftext|>",
 
3859
  "clean_up_tokenization_spaces": false,
3860
  "eoi_token": "<im_end>",
3861
  "eos_token": "<|endoftext|>",
 
3871
  "im_patch_token": "<im_patch>",
3872
  "image_token": "<image>",
3873
  "model_max_length": 32768,
3874
+ "pad_token": "<|pad|>",
3875
  "processor_class": "MolmoProcessor",
3876
  "split_special_tokens": false,
3877
  "tokenizer_class": "Qwen2Tokenizer",