llamaindex
/

vdr-2b-multi-v1

sentence-transformers

text-generation-inference

Model card Files Files and versions

cheesyFishes commited on Jan 9

Commit

1df3a64

·

verified ·

1 Parent(s): 6a23f44

update handling of init args

Files changed (1) hide show

custom_st.py +16 -5

custom_st.py CHANGED Viewed

@@ -22,6 +22,8 @@ class Transformer(nn.Module):
         min_pixels: int = 1 * 28 * 28,
         dimension: int = 2048,
         max_seq_length: Optional[int] = None,
         cache_dir: Optional[str] = None,
         device: str = 'cuda:0',
         **kwargs,
@@ -34,6 +36,17 @@ class Transformer(nn.Module):
         self.min_pixels = min_pixels
         self.max_seq_length = max_seq_length
         # Initialize model
         try:
             self.model = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -42,7 +55,7 @@ class Transformer(nn.Module):
                 torch_dtype=torch.bfloat16,
                 device_map=device,
                 cache_dir=cache_dir,
-                **kwargs
             ).eval()
         except (ImportError, ValueError) as e:
             print(f"Flash attention not available, falling back to default attention: {e}")
@@ -51,15 +64,13 @@ class Transformer(nn.Module):
                 torch_dtype=torch.bfloat16,
                 device_map=device,
                 cache_dir=cache_dir,
-                **kwargs
             ).eval()
         # Initialize processor
         self.processor = AutoProcessor.from_pretrained(
             processor_name_or_path or model_name_or_path,
-            min_pixels=min_pixels,
-            max_pixels=max_pixels,
-            cache_dir=cache_dir
         )
         # Set padding sides

         min_pixels: int = 1 * 28 * 28,
         dimension: int = 2048,
         max_seq_length: Optional[int] = None,
+        model_args: Optional[Dict[str, Any]] = None,
+        processor_args: Optional[Dict[str, Any]] = None,
         cache_dir: Optional[str] = None,
         device: str = 'cuda:0',
         **kwargs,
         self.min_pixels = min_pixels
         self.max_seq_length = max_seq_length
+        # Handle args
+        model_kwargs = model_args or {}
+        model_kwargs.update(kwargs)
+        processor_kwargs = processor_args or {}
+        processor_kwargs.update({
+            'min_pixels': min_pixels,
+            'max_pixels': max_pixels,
+            'cache_dir': cache_dir
+        })
         # Initialize model
         try:
             self.model = Qwen2VLForConditionalGeneration.from_pretrained(
                 torch_dtype=torch.bfloat16,
                 device_map=device,
                 cache_dir=cache_dir,
+                **model_kwargs
             ).eval()
         except (ImportError, ValueError) as e:
             print(f"Flash attention not available, falling back to default attention: {e}")
                 torch_dtype=torch.bfloat16,
                 device_map=device,
                 cache_dir=cache_dir,
+                **model_kwargs
             ).eval()
         # Initialize processor
         self.processor = AutoProcessor.from_pretrained(
             processor_name_or_path or model_name_or_path,
+            **processor_kwargs
         )
         # Set padding sides