pranay-ar commited on
Commit
44547fd
·
1 Parent(s): 9b65b78

add norm method

Browse files
image_processing_minicpmv.py CHANGED
@@ -395,8 +395,8 @@ class MiniCPMVImageProcessor(BaseImageProcessor):
395
  image_patches = [
396
  self.normalize(
397
  image=image,
398
- mean=np.array(self.mean).flatten(), # Flatten the mean array
399
- std=np.array(self.std).flatten(), # Flatten the std array
400
  input_data_format=input_data_format
401
  )
402
  for image in image_patches
 
395
  image_patches = [
396
  self.normalize(
397
  image=image,
398
+ mean=self.mean,
399
+ std=self.std,
400
  input_data_format=input_data_format
401
  )
402
  for image in image_patches
processing_minicpmv.py CHANGED
@@ -25,9 +25,11 @@ from transformers.image_utils import ImageInput
25
  from transformers.processing_utils import ProcessorMixin
26
  from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
27
  from transformers.utils import TensorType, requires_backends, is_torch_dtype, is_torch_device
 
28
 
29
- from .image_processing_minicpmv import MiniCPMVBatchFeature
30
 
 
 
31
 
32
  class MiniCPMVProcessor(ProcessorMixin):
33
  r"""
@@ -49,6 +51,17 @@ class MiniCPMVProcessor(ProcessorMixin):
49
  def __init__(self, image_processor=None, tokenizer=None):
50
  super().__init__(image_processor, tokenizer)
51
  self.version = image_processor.version
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  def __call__(
54
  self,
 
25
  from transformers.processing_utils import ProcessorMixin
26
  from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
27
  from transformers.utils import TensorType, requires_backends, is_torch_dtype, is_torch_device
28
+ from transformers.image_utils import ChannelDimension
29
 
 
30
 
31
+ from .image_processing_minicpmv import MiniCPMVBatchFeature
32
+ import numpy as np
33
 
34
  class MiniCPMVProcessor(ProcessorMixin):
35
  r"""
 
51
  def __init__(self, image_processor=None, tokenizer=None):
52
  super().__init__(image_processor, tokenizer)
53
  self.version = image_processor.version
54
+
55
+ def normalize(self, image, mean, std, data_format=None, input_data_format=None, **kwargs):
56
+ # Force mean and std to be 1D arrays
57
+ mean = np.array(mean).flatten() # converts, e.g., [ [0.5, 0.5, 0.5] ] to [0.5, 0.5, 0.5]
58
+ std = np.array(std).flatten()
59
+ # Apply normalization based on the channel dimension
60
+ if input_data_format == ChannelDimension.LAST or input_data_format is None:
61
+ return (image - mean) / std
62
+ else:
63
+ return ((image.T - mean) / std).T
64
+
65
 
66
  def __call__(
67
  self,