add norm method
Browse files- image_processing_minicpmv.py +2 -2
- processing_minicpmv.py +14 -1
image_processing_minicpmv.py
CHANGED
@@ -395,8 +395,8 @@ class MiniCPMVImageProcessor(BaseImageProcessor):
|
|
395 |
image_patches = [
|
396 |
self.normalize(
|
397 |
image=image,
|
398 |
-
mean=
|
399 |
-
std=
|
400 |
input_data_format=input_data_format
|
401 |
)
|
402 |
for image in image_patches
|
|
|
395 |
image_patches = [
|
396 |
self.normalize(
|
397 |
image=image,
|
398 |
+
mean=self.mean,
|
399 |
+
std=self.std,
|
400 |
input_data_format=input_data_format
|
401 |
)
|
402 |
for image in image_patches
|
processing_minicpmv.py
CHANGED
@@ -25,9 +25,11 @@ from transformers.image_utils import ImageInput
|
|
25 |
from transformers.processing_utils import ProcessorMixin
|
26 |
from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
|
27 |
from transformers.utils import TensorType, requires_backends, is_torch_dtype, is_torch_device
|
|
|
28 |
|
29 |
-
from .image_processing_minicpmv import MiniCPMVBatchFeature
|
30 |
|
|
|
|
|
31 |
|
32 |
class MiniCPMVProcessor(ProcessorMixin):
|
33 |
r"""
|
@@ -49,6 +51,17 @@ class MiniCPMVProcessor(ProcessorMixin):
|
|
49 |
def __init__(self, image_processor=None, tokenizer=None):
|
50 |
super().__init__(image_processor, tokenizer)
|
51 |
self.version = image_processor.version
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
def __call__(
|
54 |
self,
|
|
|
25 |
from transformers.processing_utils import ProcessorMixin
|
26 |
from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
|
27 |
from transformers.utils import TensorType, requires_backends, is_torch_dtype, is_torch_device
|
28 |
+
from transformers.image_utils import ChannelDimension
|
29 |
|
|
|
30 |
|
31 |
+
from .image_processing_minicpmv import MiniCPMVBatchFeature
|
32 |
+
import numpy as np
|
33 |
|
34 |
class MiniCPMVProcessor(ProcessorMixin):
|
35 |
r"""
|
|
|
51 |
def __init__(self, image_processor=None, tokenizer=None):
|
52 |
super().__init__(image_processor, tokenizer)
|
53 |
self.version = image_processor.version
|
54 |
+
|
55 |
+
def normalize(self, image, mean, std, data_format=None, input_data_format=None, **kwargs):
|
56 |
+
# Force mean and std to be 1D arrays
|
57 |
+
mean = np.array(mean).flatten() # converts, e.g., [ [0.5, 0.5, 0.5] ] to [0.5, 0.5, 0.5]
|
58 |
+
std = np.array(std).flatten()
|
59 |
+
# Apply normalization based on the channel dimension
|
60 |
+
if input_data_format == ChannelDimension.LAST or input_data_format is None:
|
61 |
+
return (image - mean) / std
|
62 |
+
else:
|
63 |
+
return ((image.T - mean) / std).T
|
64 |
+
|
65 |
|
66 |
def __call__(
|
67 |
self,
|