jupyterjazz commited on
Commit
4560851
·
verified ·
1 Parent(s): e8ce774

Reduce Console Logging (#45)

Browse files

- feat: reduced default noise of the model (cf456d3eb4b965685af2a435dc88510ed005d762)
- feat: default verbosity 1 (191d83caa0a8f06c2c1c8ca849df21bdf8857c4c)

configuration_jina_embeddings_v4.py CHANGED
@@ -2,6 +2,7 @@ from transformers.models.qwen2_5_vl import Qwen2_5_VLConfig
2
 
3
  from typing import Optional
4
 
 
5
  class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
6
  """
7
  Configuration for the JinaEmbeddingsV4 model.
@@ -12,10 +13,11 @@ class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
12
  single_vector_pool_strategy: str = "mean",
13
  multi_vector_projector_dim: int = 128,
14
  pretrained_peft_model_name_or_path: Optional[str] = None,
 
15
  **kwargs,
16
  ):
17
  super().__init__(**kwargs)
18
  self.single_vector_pool_strategy = single_vector_pool_strategy
19
  self.multi_vector_projector_dim = multi_vector_projector_dim
20
  self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
21
-
 
2
 
3
  from typing import Optional
4
 
5
+
6
  class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
7
  """
8
  Configuration for the JinaEmbeddingsV4 model.
 
13
  single_vector_pool_strategy: str = "mean",
14
  multi_vector_projector_dim: int = 128,
15
  pretrained_peft_model_name_or_path: Optional[str] = None,
16
+ verbosity: int = 1,
17
  **kwargs,
18
  ):
19
  super().__init__(**kwargs)
20
  self.single_vector_pool_strategy = single_vector_pool_strategy
21
  self.multi_vector_projector_dim = multi_vector_projector_dim
22
  self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
23
+ self.verbosity = verbosity
modeling_jina_embeddings_v4.py CHANGED
@@ -146,6 +146,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
146
  self.name_or_path, trust_remote_code=True, use_fast=True
147
  )
148
  self.multi_vector_projector_dim = config.multi_vector_projector_dim
 
149
  self._task = None
150
 
151
  @property
@@ -336,7 +337,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
336
  ), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
337
  results = []
338
  self.eval()
339
- for batch in tqdm(dataloader, desc=desc):
340
  with torch.no_grad():
341
  batch = {k: v.to(self.device) for k, v in batch.items()}
342
  with torch.autocast(
 
146
  self.name_or_path, trust_remote_code=True, use_fast=True
147
  )
148
  self.multi_vector_projector_dim = config.multi_vector_projector_dim
149
+ self.verbosity = config.verbosity
150
  self._task = None
151
 
152
  @property
 
337
  ), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
338
  results = []
339
  self.eval()
340
+ for batch in tqdm(dataloader, desc=desc, disable=self.verbosity == 0):
341
  with torch.no_grad():
342
  batch = {k: v.to(self.device) for k, v in batch.items()}
343
  with torch.autocast(