Spaces:

magicr
/

BuboGPT

Runtime error

ikuinen99 commited on Jul 21, 2023

Commit

192e5fb

1 Parent(s): 245b48f

update

Files changed (3) hide show

bubogpt/models/mm_gpt4.py CHANGED Viewed

@@ -78,10 +78,12 @@ class MMGPT4(BaseModel):
         self.low_resource = low_resource
         print('Loading ImageBind')
         self.multimodal_encoder = imagebind_huge(pretrained=True, freeze_imagebind=freeze_imagebind,
                                                  with_head=with_bind_head, use_blip_vision=use_blip_vision)
         print('Loading ImageBind Done')
         print(f'Loading LLAMA from {llama_model}')
         self.llama_tokenizer = LlamaTokenizer.from_pretrained('magicr/vicuna-7b', use_fast=False, use_auth_token=True)
@@ -94,12 +96,14 @@ class MMGPT4(BaseModel):
             for name, param in self.llama_model.named_parameters():
                 param.requires_grad = False
         print('Loading LLAMA Done')
         print('Loading Q-Former and Adapter/Projector')
         self.multimodal_joiner = ImageBindJoiner(joiner_cfg, output_dim=self.llama_model.config.hidden_size)
         if use_blip_vision:
             replace_joiner_vision(self.multimodal_joiner, q_former_model, proj_model)
         print('Loading Q-Former and Adapter/Projector Done')
         self.max_txt_len = max_txt_len
         self.end_sym = end_sym

         self.low_resource = low_resource
+        import gc
         print('Loading ImageBind')
         self.multimodal_encoder = imagebind_huge(pretrained=True, freeze_imagebind=freeze_imagebind,
                                                  with_head=with_bind_head, use_blip_vision=use_blip_vision)
         print('Loading ImageBind Done')
+        gc.collect()
         print(f'Loading LLAMA from {llama_model}')
         self.llama_tokenizer = LlamaTokenizer.from_pretrained('magicr/vicuna-7b', use_fast=False, use_auth_token=True)
             for name, param in self.llama_model.named_parameters():
                 param.requires_grad = False
         print('Loading LLAMA Done')
+        gc.collect()
         print('Loading Q-Former and Adapter/Projector')
         self.multimodal_joiner = ImageBindJoiner(joiner_cfg, output_dim=self.llama_model.config.hidden_size)
         if use_blip_vision:
             replace_joiner_vision(self.multimodal_joiner, q_former_model, proj_model)
         print('Loading Q-Former and Adapter/Projector Done')
+        gc.collect()
         self.max_txt_len = max_txt_len
         self.end_sym = end_sym

grounding_model.py CHANGED Viewed

@@ -17,11 +17,13 @@ from groundingdino.util.utils import clean_state_dict
 def load_groundingdino_model(model_config_path, model_checkpoint_path):
     args = CN.load_cfg(open(model_config_path, "r"))
     model = build_groundingdino(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
     load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
     print('loading GroundingDINO:', load_res)
     _ = model.eval()
     return model

 def load_groundingdino_model(model_config_path, model_checkpoint_path):
+    import gc
     args = CN.load_cfg(open(model_config_path, "r"))
     model = build_groundingdino(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
     load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
     print('loading GroundingDINO:', load_res)
+    gc.collect()
     _ = model.eval()
     return model

tagging_model.py CHANGED Viewed

@@ -8,6 +8,7 @@ from ram.models import ram
 class TaggingModule(nn.Module):
     def __init__(self, device='cpu'):
         super().__init__()
         self.device = device
         image_size = 384
         self.transform = transforms.Compose([
@@ -23,6 +24,7 @@ class TaggingModule(nn.Module):
             vit='swin_l'
         ).eval().to(device)
         print('==> Tagging Module Loaded.')
     @torch.no_grad()
     def forward(self, original_image):

 class TaggingModule(nn.Module):
     def __init__(self, device='cpu'):
         super().__init__()
+        import gc
         self.device = device
         image_size = 384
         self.transform = transforms.Compose([
             vit='swin_l'
         ).eval().to(device)
         print('==> Tagging Module Loaded.')
+        gc.collect()
     @torch.no_grad()
     def forward(self, original_image):