GoofyLM
/

N2.1-Eye-1.3B

@@ -167,21 +167,46 @@ class MultimodalLFM2Model(PreTrainedModel):
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
         """
-        Custom loading method - loads from flat directory structure.
         """
         config = cls.config_class.from_pretrained(pretrained_model_name_or_path)
         model = cls(config)
-        # Load language model state dict
-        language_model_path = os.path.join(pretrained_model_name_or_path, "language_model.bin")
-        if os.path.exists(language_model_path):
-            language_state_dict = torch.load(language_model_path, map_location="cpu")
-            model.language_model.load_state_dict(language_state_dict)
-        # Load vision projection
-        projection_path = os.path.join(pretrained_model_name_or_path, "vision_projection.bin")
-        if os.path.exists(projection_path):
-            projection_state_dict = torch.load(projection_path, map_location="cpu")
-            model.vision_projection.load_state_dict(projection_state_dict)
         return model

     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
         """
+        Custom loading method - works with your current structure.
         """
         config = cls.config_class.from_pretrained(pretrained_model_name_or_path)
         model = cls(config)
+        # Try to load from pytorch_model.bin (your current structure)
+        main_model_path = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin")
+        if os.path.exists(main_model_path):
+            # Load the full model state dict
+            full_state_dict = torch.load(main_model_path, map_location="cpu")
+            # Separate language model and vision projection weights
+            language_state_dict = {}
+            projection_state_dict = {}
+            for key, value in full_state_dict.items():
+                if key.startswith("language_model."):
+                    # Remove the "language_model." prefix
+                    new_key = key[len("language_model."):]
+                    language_state_dict[new_key] = value
+                elif key.startswith("vision_projection."):
+                    # Remove the "vision_projection." prefix
+                    new_key = key[len("vision_projection."):]
+                    projection_state_dict[new_key] = value
+            # Load the separated state dicts
+            if language_state_dict:
+                model.language_model.load_state_dict(language_state_dict)
+            if projection_state_dict:
+                model.vision_projection.load_state_dict(projection_state_dict)
+        else:
+            # Fallback to separate files
+            language_model_path = os.path.join(pretrained_model_name_or_path, "language_model.bin")
+            if os.path.exists(language_model_path):
+                language_state_dict = torch.load(language_model_path, map_location="cpu")
+                model.language_model.load_state_dict(language_state_dict)
+            projection_path = os.path.join(pretrained_model_name_or_path, "vision_projection.bin")
+            if os.path.exists(projection_path):
+                projection_state_dict = torch.load(projection_path, map_location="cpu")
+                model.vision_projection.load_state_dict(projection_state_dict)
         return model