Kian Kyars commited on
Commit
bc311dd
·
1 Parent(s): 887133f

Fix model loading issues - pin transformers/torch versions, add trust_remote_code

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -15,10 +15,12 @@ model_image = (
15
  .apt_install("git")
16
  .pip_install(
17
  [
 
 
 
18
  "git+https://github.com/illuin-tech/colpali.git@782edcd50108d1842d154730ad3ce72476a2d17d",
19
  "hf_transfer==0.1.8",
20
  "qwen-vl-utils==0.0.8",
21
- "torchvision==0.19.1",
22
  ]
23
  )
24
  .env({"HF_HUB_ENABLE_HF_TRANSFER": "1", "HF_HUB_CACHE": CACHE_DIR})
@@ -61,17 +63,21 @@ def download_model():
61
 
62
  @app.cls(
63
  image=model_image,
64
- gpu="A100-80GB",
65
  scaledown_window=10 * MINUTES,
66
  volumes={"/vol/pdfs/": pdf_volume, CACHE_DIR: cache_volume},
67
  )
68
  class Model:
69
  @modal.enter()
70
  def load_models(self):
 
 
 
71
  self.colqwen2_model = ColQwen2.from_pretrained(
72
  "vidore/colqwen2-v0.1",
73
  torch_dtype=torch.bfloat16,
74
  device_map="cuda:0",
 
75
  )
76
  self.colqwen2_processor = ColQwen2Processor.from_pretrained(
77
  "vidore/colqwen2-v0.1"
@@ -80,10 +86,13 @@ class Model:
80
  MODEL_NAME,
81
  revision=MODEL_REVISION,
82
  torch_dtype=torch.bfloat16,
 
83
  )
84
  self.qwen2_vl_model.to("cuda:0")
85
  self.qwen2_vl_processor = AutoProcessor.from_pretrained(
86
- "Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True
 
 
87
  )
88
 
89
  @modal.method()
 
15
  .apt_install("git")
16
  .pip_install(
17
  [
18
+ "transformers==4.44.2",
19
+ "torch==2.4.1",
20
+ "torchvision==0.19.1",
21
  "git+https://github.com/illuin-tech/colpali.git@782edcd50108d1842d154730ad3ce72476a2d17d",
22
  "hf_transfer==0.1.8",
23
  "qwen-vl-utils==0.0.8",
 
24
  ]
25
  )
26
  .env({"HF_HUB_ENABLE_HF_TRANSFER": "1", "HF_HUB_CACHE": CACHE_DIR})
 
63
 
64
  @app.cls(
65
  image=model_image,
66
+ gpu="B200",
67
  scaledown_window=10 * MINUTES,
68
  volumes={"/vol/pdfs/": pdf_volume, CACHE_DIR: cache_volume},
69
  )
70
  class Model:
71
  @modal.enter()
72
  def load_models(self):
73
+ import os
74
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
75
+
76
  self.colqwen2_model = ColQwen2.from_pretrained(
77
  "vidore/colqwen2-v0.1",
78
  torch_dtype=torch.bfloat16,
79
  device_map="cuda:0",
80
+ trust_remote_code=True,
81
  )
82
  self.colqwen2_processor = ColQwen2Processor.from_pretrained(
83
  "vidore/colqwen2-v0.1"
 
86
  MODEL_NAME,
87
  revision=MODEL_REVISION,
88
  torch_dtype=torch.bfloat16,
89
+ trust_remote_code=True,
90
  )
91
  self.qwen2_vl_model.to("cuda:0")
92
  self.qwen2_vl_processor = AutoProcessor.from_pretrained(
93
+ MODEL_NAME,
94
+ revision=MODEL_REVISION,
95
+ trust_remote_code=True
96
  )
97
 
98
  @modal.method()