Kwai-Keye
/

Keye-VL-8B-Preview

Video-Text-to-Text

feature-extraction

Model card Files Files and versions

Kwai-Keye commited on Jun 27

Commit

a964671

·

verified ·

1 Parent(s): a88b27e

fix preprocessor

Files changed (2) hide show

image_processing_keye.py +2 -4
preprocessor_config.json +3 -3

image_processing_keye.py CHANGED Viewed

@@ -129,7 +129,7 @@ def smart_resize(
     width: int,
     factor: int = 28,
     min_pixels: int = 28 * 28 * 130,
-    max_pixels: int = 28 * 28 * 1670,
 ):
     """Rescales the image so that the following conditions are met:
@@ -223,7 +223,7 @@ class SiglipImageProcessor(BaseImageProcessor):
         image_std: Optional[Union[float, List[float]]] = None,
         do_convert_rgb: bool = True,
         min_pixels: int = 28 * 28 * 130,
-        max_pixels: int = 28 * 28 * 1670,
         patch_size: int = 14,
         temporal_patch_size: int = 1,
         merge_size: int = 2,
@@ -357,7 +357,6 @@ class SiglipImageProcessor(BaseImageProcessor):
         processed_images = []
         for image in images:
-            # image = self.mvit_rescale(image, merge_size=self.merge_size)
             if do_resize:
                 resized_height, resized_width = smart_resize(
                     height,
@@ -385,7 +384,6 @@ class SiglipImageProcessor(BaseImageProcessor):
                     std=image_std,
                     input_data_format=input_data_format,
                 )
             image = to_channel_dimension_format(
                 image, data_format, input_channel_dim=input_data_format
             )

     width: int,
     factor: int = 28,
     min_pixels: int = 28 * 28 * 130,
+    max_pixels: int = 28 * 28 * 1280,
 ):
     """Rescales the image so that the following conditions are met:
         image_std: Optional[Union[float, List[float]]] = None,
         do_convert_rgb: bool = True,
         min_pixels: int = 28 * 28 * 130,
+        max_pixels: int = 28 * 28 * 1280,
         patch_size: int = 14,
         temporal_patch_size: int = 1,
         merge_size: int = 2,
         processed_images = []
         for image in images:
             if do_resize:
                 resized_height, resized_width = smart_resize(
                     height,
                     std=image_std,
                     input_data_format=input_data_format,
                 )
             image = to_channel_dimension_format(
                 image, data_format, input_channel_dim=input_data_format
             )

preprocessor_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "min_pixels": 101920,
-  "max_pixels": 1309280,
   "patch_size": 14,
   "temporal_patch_size": 1,
   "merge_size": 2,
@@ -15,4 +15,4 @@
     "AutoProcessor": "processing_keye.KeyeProcessor",
     "AutoImageProcessor": "image_processing_keye.SiglipImageProcessor"
   }
-}

 {
+  "min_pixels": 102400,
+  "max_pixels": 1003520,
   "patch_size": 14,
   "temporal_patch_size": 1,
   "merge_size": 2,
     "AutoProcessor": "processing_keye.KeyeProcessor",
     "AutoImageProcessor": "image_processing_keye.SiglipImageProcessor"
   }
+}