add norm method

Files changed (2) hide show

image_processing_minicpmv.py CHANGED Viewed

@@ -395,8 +395,8 @@ class MiniCPMVImageProcessor(BaseImageProcessor):
                 image_patches = [
                     self.normalize(
                         image=image,
-                        mean=np.array(self.mean).flatten(),   # Flatten the mean array
-                        std=np.array(self.std).flatten(),     # Flatten the std array
                         input_data_format=input_data_format
                     )
                     for image in image_patches

                 image_patches = [
                     self.normalize(
                         image=image,
+                        mean=self.mean,
+                        std=self.std,
                         input_data_format=input_data_format
                     )
                     for image in image_patches

processing_minicpmv.py CHANGED Viewed

@@ -25,9 +25,11 @@ from transformers.image_utils import ImageInput
 from transformers.processing_utils import ProcessorMixin
 from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
 from transformers.utils import TensorType, requires_backends, is_torch_dtype, is_torch_device
-from .image_processing_minicpmv import MiniCPMVBatchFeature
 class MiniCPMVProcessor(ProcessorMixin):
     r"""
@@ -49,6 +51,17 @@ class MiniCPMVProcessor(ProcessorMixin):
     def __init__(self, image_processor=None, tokenizer=None):
         super().__init__(image_processor, tokenizer)
         self.version = image_processor.version
     def __call__(
         self,

 from transformers.processing_utils import ProcessorMixin
 from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
 from transformers.utils import TensorType, requires_backends, is_torch_dtype, is_torch_device
+from transformers.image_utils import ChannelDimension
+from .image_processing_minicpmv import MiniCPMVBatchFeature
+import numpy as np
 class MiniCPMVProcessor(ProcessorMixin):
     r"""
     def __init__(self, image_processor=None, tokenizer=None):
         super().__init__(image_processor, tokenizer)
         self.version = image_processor.version
+    def normalize(self, image, mean, std, data_format=None, input_data_format=None, **kwargs):
+        # Force mean and std to be 1D arrays
+        mean = np.array(mean).flatten()   # converts, e.g., [ [0.5, 0.5, 0.5] ] to [0.5, 0.5, 0.5]
+        std = np.array(std).flatten()
+        # Apply normalization based on the channel dimension
+        if input_data_format == ChannelDimension.LAST or input_data_format is None:
+            return (image - mean) / std
+        else:
+            return ((image.T - mean) / std).T
     def __call__(
         self,