Reduce Console Logging (#45)
Browse files- feat: reduced default noise of the model (cf456d3eb4b965685af2a435dc88510ed005d762)
- feat: default verbosity 1 (191d83caa0a8f06c2c1c8ca849df21bdf8857c4c)
configuration_jina_embeddings_v4.py
CHANGED
@@ -2,6 +2,7 @@ from transformers.models.qwen2_5_vl import Qwen2_5_VLConfig
|
|
2 |
|
3 |
from typing import Optional
|
4 |
|
|
|
5 |
class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
|
6 |
"""
|
7 |
Configuration for the JinaEmbeddingsV4 model.
|
@@ -12,10 +13,11 @@ class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
|
|
12 |
single_vector_pool_strategy: str = "mean",
|
13 |
multi_vector_projector_dim: int = 128,
|
14 |
pretrained_peft_model_name_or_path: Optional[str] = None,
|
|
|
15 |
**kwargs,
|
16 |
):
|
17 |
super().__init__(**kwargs)
|
18 |
self.single_vector_pool_strategy = single_vector_pool_strategy
|
19 |
self.multi_vector_projector_dim = multi_vector_projector_dim
|
20 |
self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
|
21 |
-
|
|
|
2 |
|
3 |
from typing import Optional
|
4 |
|
5 |
+
|
6 |
class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
|
7 |
"""
|
8 |
Configuration for the JinaEmbeddingsV4 model.
|
|
|
13 |
single_vector_pool_strategy: str = "mean",
|
14 |
multi_vector_projector_dim: int = 128,
|
15 |
pretrained_peft_model_name_or_path: Optional[str] = None,
|
16 |
+
verbosity: int = 1,
|
17 |
**kwargs,
|
18 |
):
|
19 |
super().__init__(**kwargs)
|
20 |
self.single_vector_pool_strategy = single_vector_pool_strategy
|
21 |
self.multi_vector_projector_dim = multi_vector_projector_dim
|
22 |
self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
|
23 |
+
self.verbosity = verbosity
|
modeling_jina_embeddings_v4.py
CHANGED
@@ -146,6 +146,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
146 |
self.name_or_path, trust_remote_code=True, use_fast=True
|
147 |
)
|
148 |
self.multi_vector_projector_dim = config.multi_vector_projector_dim
|
|
|
149 |
self._task = None
|
150 |
|
151 |
@property
|
@@ -336,7 +337,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
336 |
), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
|
337 |
results = []
|
338 |
self.eval()
|
339 |
-
for batch in tqdm(dataloader, desc=desc):
|
340 |
with torch.no_grad():
|
341 |
batch = {k: v.to(self.device) for k, v in batch.items()}
|
342 |
with torch.autocast(
|
|
|
146 |
self.name_or_path, trust_remote_code=True, use_fast=True
|
147 |
)
|
148 |
self.multi_vector_projector_dim = config.multi_vector_projector_dim
|
149 |
+
self.verbosity = config.verbosity
|
150 |
self._task = None
|
151 |
|
152 |
@property
|
|
|
337 |
), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
|
338 |
results = []
|
339 |
self.eval()
|
340 |
+
for batch in tqdm(dataloader, desc=desc, disable=self.verbosity == 0):
|
341 |
with torch.no_grad():
|
342 |
batch = {k: v.to(self.device) for k, v in batch.items()}
|
343 |
with torch.autocast(
|