support-max-pixels (#6)
Browse files- feat: configure max pixels (509c5332bacf540daad3c8fcead31c9abf2e2452)
- README.md +2 -2
- modeling_jina_embeddings_v4.py +40 -0
README.md
CHANGED
|
@@ -24,8 +24,8 @@ images = [Image.open(path) for path in image_paths]
|
|
| 24 |
# Example 1: Text matching task with single vector embeddings
|
| 25 |
model.set_task(task='text-matching')
|
| 26 |
|
| 27 |
-
# Generate embeddings with dimension truncation (256)
|
| 28 |
-
img_embeddings = model.encode_images(images=images, truncate_dim=256)
|
| 29 |
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
|
| 30 |
|
| 31 |
# Example 2: Retrieval task with multi-vector embeddings
|
|
|
|
| 24 |
# Example 1: Text matching task with single vector embeddings
|
| 25 |
model.set_task(task='text-matching')
|
| 26 |
|
| 27 |
+
# Generate embeddings with dimension truncation (256), decrease max_pixels
|
| 28 |
+
img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112)
|
| 29 |
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
|
| 30 |
|
| 31 |
# Example 2: Retrieval task with multi-vector embeddings
|
modeling_jina_embeddings_v4.py
CHANGED
|
@@ -374,6 +374,21 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
| 374 |
truncate_dim: Optional[int] = None,
|
| 375 |
text_type: Optional[str] = None,
|
| 376 |
) -> List[torch.Tensor]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
text_type = text_type or "query"
|
| 378 |
encode_kwargs = self._validate_encoding_params(
|
| 379 |
vector_type, truncate_dim, text_type
|
|
@@ -404,7 +419,26 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
| 404 |
vector_type: Optional[str] = None,
|
| 405 |
return_numpy: bool = False,
|
| 406 |
truncate_dim: Optional[int] = None,
|
|
|
|
| 407 |
) -> List[torch.Tensor]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
|
| 409 |
|
| 410 |
is_single = len(images) == 1
|
|
@@ -417,6 +451,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
| 417 |
**encode_kwargs,
|
| 418 |
)
|
| 419 |
|
|
|
|
|
|
|
|
|
|
| 420 |
return embeddings[0] if is_single else embeddings
|
| 421 |
|
| 422 |
@classmethod
|
|
@@ -426,6 +463,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
| 426 |
*args,
|
| 427 |
**kwargs,
|
| 428 |
):
|
|
|
|
|
|
|
|
|
|
| 429 |
if "torch_dtype" not in kwargs:
|
| 430 |
kwargs["torch_dtype"] = "auto"
|
| 431 |
|
|
|
|
| 374 |
truncate_dim: Optional[int] = None,
|
| 375 |
text_type: Optional[str] = None,
|
| 376 |
) -> List[torch.Tensor]:
|
| 377 |
+
"""
|
| 378 |
+
Encodes a list of texts into embeddings.
|
| 379 |
+
|
| 380 |
+
Args:
|
| 381 |
+
texts: List of text strings to encode
|
| 382 |
+
max_length: Maximum token length for text processing
|
| 383 |
+
batch_size: Number of texts to process at once
|
| 384 |
+
vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
|
| 385 |
+
return_numpy: Whether to return numpy arrays instead of torch tensors
|
| 386 |
+
truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
|
| 387 |
+
text_type: Type of text being encoded ('query' or 'passage')
|
| 388 |
+
|
| 389 |
+
Returns:
|
| 390 |
+
List of text embeddings as tensors or numpy arrays
|
| 391 |
+
"""
|
| 392 |
text_type = text_type or "query"
|
| 393 |
encode_kwargs = self._validate_encoding_params(
|
| 394 |
vector_type, truncate_dim, text_type
|
|
|
|
| 419 |
vector_type: Optional[str] = None,
|
| 420 |
return_numpy: bool = False,
|
| 421 |
truncate_dim: Optional[int] = None,
|
| 422 |
+
max_pixels: Optional[int] = None,
|
| 423 |
) -> List[torch.Tensor]:
|
| 424 |
+
"""
|
| 425 |
+
Encodes a list of images into embeddings.
|
| 426 |
+
|
| 427 |
+
Args:
|
| 428 |
+
images: List of PIL images to encode
|
| 429 |
+
batch_size: Number of images to process at once
|
| 430 |
+
vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
|
| 431 |
+
return_numpy: Whether to return numpy arrays instead of torch tensors
|
| 432 |
+
truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
|
| 433 |
+
max_pixels: Maximum number of pixels to process per image
|
| 434 |
+
|
| 435 |
+
Returns:
|
| 436 |
+
List of image embeddings as tensors or numpy arrays
|
| 437 |
+
"""
|
| 438 |
+
if max_pixels:
|
| 439 |
+
default_max_pixels = self.processor.image_processor.max_pixels
|
| 440 |
+
self.processor.image_processor.max_pixels = max_pixels # change during encoding
|
| 441 |
+
|
| 442 |
encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
|
| 443 |
|
| 444 |
is_single = len(images) == 1
|
|
|
|
| 451 |
**encode_kwargs,
|
| 452 |
)
|
| 453 |
|
| 454 |
+
if max_pixels:
|
| 455 |
+
self.processor.image_processor.max_pixels = default_max_pixels
|
| 456 |
+
|
| 457 |
return embeddings[0] if is_single else embeddings
|
| 458 |
|
| 459 |
@classmethod
|
|
|
|
| 463 |
*args,
|
| 464 |
**kwargs,
|
| 465 |
):
|
| 466 |
+
"""
|
| 467 |
+
Loads a pretrained model and configures it with the appropriate task adapter (`retrieval` by default).
|
| 468 |
+
"""
|
| 469 |
if "torch_dtype" not in kwargs:
|
| 470 |
kwargs["torch_dtype"] = "auto"
|
| 471 |
|