Update preprocessing_molmo.py (#11)
Browse files- Update preprocessing_molmo.py (8e0256f86bfdb1368afbfd343d8b09bf2fe25d93)
- preprocessing_molmo.py +9 -6
preprocessing_molmo.py
CHANGED
@@ -23,7 +23,7 @@ from transformers.processing_utils import (
|
|
23 |
ProcessorMixin,
|
24 |
)
|
25 |
|
26 |
-
from transformers.tokenization_utils_base import TextInput
|
27 |
from transformers.utils import logging
|
28 |
|
29 |
from transformers import AutoTokenizer
|
@@ -116,6 +116,8 @@ class MolmoProcessor(ProcessorMixin):
|
|
116 |
self,
|
117 |
text: TextInput = None,
|
118 |
images: ImageInput = None,
|
|
|
|
|
119 |
**kwargs: Unpack[MolmoProcessorKwargs],
|
120 |
):
|
121 |
output_kwargs = self._merge_kwargs(
|
@@ -124,11 +126,12 @@ class MolmoProcessor(ProcessorMixin):
|
|
124 |
**kwargs,
|
125 |
)
|
126 |
|
127 |
-
tokens
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
132 |
|
133 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
134 |
|
|
|
23 |
ProcessorMixin,
|
24 |
)
|
25 |
|
26 |
+
from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
|
27 |
from transformers.utils import logging
|
28 |
|
29 |
from transformers import AutoTokenizer
|
|
|
116 |
self,
|
117 |
text: TextInput = None,
|
118 |
images: ImageInput = None,
|
119 |
+
*,
|
120 |
+
tokens: Optional[PreTokenizedInput] = None,
|
121 |
**kwargs: Unpack[MolmoProcessorKwargs],
|
122 |
):
|
123 |
output_kwargs = self._merge_kwargs(
|
|
|
126 |
**kwargs,
|
127 |
)
|
128 |
|
129 |
+
if tokens is None:
|
130 |
+
tokens = self.get_tokens_input(
|
131 |
+
text,
|
132 |
+
output_kwargs["text_kwargs"]["message_format"],
|
133 |
+
output_kwargs["text_kwargs"]["always_start_with_space"],
|
134 |
+
)
|
135 |
|
136 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
137 |
|