Upload processor

Browse files

Files changed (6) hide show

added_tokens.json +1 -0
chat_template.jinja +1 -0
preprocessor_config.json +6 -0
special_tokens_map.json +2 -14
tokenizer.json +2 -2
tokenizer_config.json +9 -2

added_tokens.json CHANGED Viewed

@@ -8,6 +8,7 @@
   "<|im_end|>": 151645,
   "<|im_start|>": 151644,
   "<|image|>": 152068,
   "|<EXTRA_TOKENS_0>|": 151646,
   "|<EXTRA_TOKENS_100>|": 151746,
   "|<EXTRA_TOKENS_101>|": 151747,

   "<|im_end|>": 151645,
   "<|im_start|>": 151644,
   "<|image|>": 152068,
+  "<|pad|>": 152070,
   "|<EXTRA_TOKENS_0>|": 151646,
   "|<EXTRA_TOKENS_100>|": 151746,
   "|<EXTRA_TOKENS_101>|": 151747,

chat_template.jinja ADDED Viewed

	@@ -0,0 +1 @@

+ {{ bos_token or '' }}{% for message in messages %}{%- if (loop.index % 2 == 1 and message['role'] != 'user') or (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{%- endif -%}{% if message['content'] is not string %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image> ' }}{% endif %}{% endfor %}{% endif %}{{ message['role'].capitalize() + ': ' }}{% if message['content'] is string %}{{ message['content'] + ' ' }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'text' %}{{ content['text'] + ' ' }}{% endif %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}

preprocessor_config.json CHANGED Viewed

@@ -11,6 +11,10 @@
   "crop_size": 336,
   "crop_window_patches": 16,
   "crop_window_size": 224,
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_pad": true,
@@ -36,6 +40,7 @@
   ],
   "image_token_length_h": 12,
   "image_token_length_w": 12,
   "max_crops": 12,
   "max_num_crops": 12,
   "overlap_margins": [
@@ -49,6 +54,7 @@
   "processor_class": "MolmoProcessor",
   "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {
     "height": 336,
     "width": 336

   "crop_size": 336,
   "crop_window_patches": 16,
   "crop_window_size": 224,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "do_center_crop": null,
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_pad": true,
   ],
   "image_token_length_h": 12,
   "image_token_length_w": 12,
+  "input_data_format": null,
   "max_crops": 12,
   "max_num_crops": 12,
   "overlap_margins": [
   "processor_class": "MolmoProcessor",
   "resample": 2,
   "rescale_factor": 0.00392156862745098,
+  "return_tensors": null,
   "size": {
     "height": 336,
     "width": 336

special_tokens_map.json CHANGED Viewed

@@ -425,13 +425,7 @@
     "<|image|>"
   ],
   "boi_token": "<im_start>",
-  "bos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "eoi_token": "<im_end>",
   "eos_token": {
     "content": "<|endoftext|>",
@@ -443,11 +437,5 @@
   "im_col_token": "<im_col>",
   "im_patch_token": "<im_patch>",
   "image_token": "<image>",
-  "pad_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

     "<|image|>"
   ],
   "boi_token": "<im_start>",
+  "bos_token": "<|endoftext|>",
   "eoi_token": "<im_end>",
   "eos_token": {
     "content": "<|endoftext|>",
   "im_col_token": "<im_col>",
   "im_patch_token": "<im_patch>",
   "image_token": "<image>",
+  "pad_token": "<|pad|>"
 }

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec9836c15b34f4d6e1c2324574589d9886de43d5ba3339b27c5be18ed8a9c0df
-size 11501616

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e12d99cec7795d0d3dd206aa62255db4c8c6a1ddf644fc2b304703b1c34a29d
+size 11501800

tokenizer_config.json CHANGED Viewed

@@ -3416,6 +3416,14 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [
@@ -3848,7 +3856,6 @@
   },
   "boi_token": "<im_start>",
   "bos_token": "<|endoftext|>",
-  "chat_template": "{% for message in messages -%}\n        {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n          (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n        {%- endif -%}\n        {{ message['role'].capitalize() + ': ' + message['content'] }}\n        {%- if not loop.last -%}\n        {{ ' ' }}\n        {%- endif %}\n        {%- endfor -%}\n        {%- if add_generation_prompt -%}\n        {{ ' Assistant:' }}\n        {%- endif %}",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<im_end>",
   "eos_token": "<|endoftext|>",
@@ -3864,7 +3871,7 @@
   "im_patch_token": "<im_patch>",
   "image_token": "<image>",
   "model_max_length": 32768,
-  "pad_token": "<|endoftext|>",
   "processor_class": "MolmoProcessor",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "152070": {
+      "content": "<|pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
     }
   },
   "additional_special_tokens": [
   },
   "boi_token": "<im_start>",
   "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<im_end>",
   "eos_token": "<|endoftext|>",
   "im_patch_token": "<im_patch>",
   "image_token": "<image>",
   "model_max_length": 32768,
+  "pad_token": "<|pad|>",
   "processor_class": "MolmoProcessor",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",