jw2yang commited on
Commit
71c922f
·
verified ·
1 Parent(s): 58f2b4f

Upload processing_magma.py

Browse files
Files changed (1) hide show
  1. processing_magma.py +142 -0
processing_magma.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ Processor class for Magma.
17
+ """
18
+
19
+ from typing import List, Optional, Union
20
+
21
+ import transformers
22
+ from transformers.feature_extraction_utils import BatchFeature
23
+ from transformers.image_utils import ImageInput
24
+ from transformers.processing_utils import ProcessorMixin
25
+ from transformers.tokenization_utils_base import PaddingStrategy, TextInput, TruncationStrategy
26
+ from transformers.utils import TensorType
27
+ from .configuration_magma import MagmaConfig
28
+
29
+
30
+ class MagmaProcessor(ProcessorMixin):
31
+ r"""
32
+ Constructs a Magma processor which wraps a Magma image processor and a LLaMa tokenizer into a single processor.
33
+
34
+ [`MagmaProcessor`] offers all the functionalities of [`MagmaImageProcessor`] and [`LlamaTokenizerFast`]. See the
35
+ [`~MagmaProcessor.__call__`] and [`~MagmaProcessor.decode`] for more information.
36
+
37
+ Args:
38
+ image_processor ([`MagmaImageProcessor`], *optional*):
39
+ The image processor is a required input.
40
+ tokenizer ([`LlamaTokenizerFast`], *optional*):
41
+ The tokenizer is a required input.
42
+ """
43
+
44
+ attributes = ["image_processor", "tokenizer"]
45
+ image_processor_class = "AutoImageProcessor"
46
+ tokenizer_class = "AutoTokenizer"
47
+
48
+ def __init__(self, image_processor=None, tokenizer=None):
49
+ # super().__init__(image_processor, tokenizer)
50
+ self.image_processor = image_processor
51
+ self.tokenizer = tokenizer
52
+
53
+ def __call__(
54
+ self,
55
+ texts: Union[TextInput, List[TextInput]],
56
+ images: Union[ImageInput, List[ImageInput]],
57
+ padding: Union[bool, str, PaddingStrategy] = False,
58
+ truncation: Union[bool, str, TruncationStrategy] = None,
59
+ max_length: Optional[int] = None,
60
+ do_pad: Optional[bool] = False,
61
+ return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
62
+ ) -> BatchFeature:
63
+ """
64
+ Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
65
+ and `kwargs` arguments to LlamaTokenizerFast's [`~LlamaTokenizerFast.__call__`] if `text` is not `None` to encode
66
+ the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
67
+ MagmaImageProcessor's [`~MagmaImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
68
+ of the above two methods for more information.
69
+
70
+ Args:
71
+ texts (`str`, `List[str]`, `List[List[str]]`):
72
+ The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
73
+ (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
74
+ `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
75
+ images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
76
+ The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
77
+ tensor. Both channels-first and channels-last formats are supported.
78
+ padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
79
+ Select a strategy to pad the returned sequences (according to the model's padding side and padding
80
+ index) among:
81
+ - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
82
+ sequence if provided).
83
+ - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
84
+ acceptable input length for the model if that argument is not provided.
85
+ - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
86
+ lengths).
87
+ max_length (`int`, *optional*):
88
+ Maximum length of the returned list and optionally padding length (see above).
89
+ do_pad (`bool`, *optional*, defaults to self.do_pad):
90
+ Whether to pad the image. If `True` will pad the images in the batch to the largest image in the batch
91
+ and create a pixel mask. Padding will be applied to the bottom and right of the image with zeros.
92
+ truncation (`bool`, *optional*):
93
+ Activates truncation to cut input sequences longer than `max_length` to `max_length`.
94
+ return_tensors (`str` or [`~utils.TensorType`], *optional*):
95
+ If set, will return tensors of a particular framework. Acceptable values are:
96
+
97
+ - `'tf'`: Return TensorFlow `tf.constant` objects.
98
+ - `'pt'`: Return PyTorch `torch.Tensor` objects.
99
+ - `'np'`: Return NumPy `np.ndarray` objects.
100
+ - `'jax'`: Return JAX `jnp.ndarray` objects.
101
+
102
+ Returns:
103
+ [`BatchFeature`]: A [`BatchFeature`] with the following fields:
104
+
105
+ - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
106
+ - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
107
+ `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
108
+ `None`).
109
+ - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
110
+ """
111
+ if images is not None:
112
+ image_inputs = self.image_processor(images, do_pad=do_pad, return_tensors=return_tensors)
113
+ else:
114
+ image_inputs = {}
115
+ text_inputs = self.tokenizer(
116
+ texts, return_tensors=return_tensors, padding=padding, truncation=truncation, max_length=max_length
117
+ )
118
+
119
+ return BatchFeature(data={**text_inputs, **image_inputs})
120
+
121
+ # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Llama
122
+ def batch_decode(self, *args, **kwargs):
123
+ """
124
+ This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
125
+ refer to the docstring of this method for more information.
126
+ """
127
+ return self.tokenizer.batch_decode(*args, **kwargs)
128
+
129
+ # Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->Llama
130
+ def decode(self, *args, **kwargs):
131
+ """
132
+ This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
133
+ the docstring of this method for more information.
134
+ """
135
+ return self.tokenizer.decode(*args, **kwargs)
136
+
137
+ @property
138
+ # Copied from transformers.models.clip.processing_clip.CLIPProcessor.model_input_names
139
+ def model_input_names(self):
140
+ tokenizer_input_names = self.tokenizer.model_input_names
141
+ image_processor_input_names = self.image_processor.model_input_names
142
+ return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))