Upload model

Browse files

Files changed (4) hide show

adaptor_generic.py +29 -0
adaptor_mlp.py +150 -0
adaptor_registry.py +37 -0
hf_model.py +5 -1

adaptor_generic.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+from argparse import Namespace
+import torch
+from torch import nn
+import torch.nn.functional as F
+from .adaptor_base import AdaptorBase, AdaptorInput, RadioOutput
+from .adaptor_mlp import create_mlp_from_state
+class GenericAdaptor(AdaptorBase):
+    def __init__(self, main_config: Namespace, adaptor_config, state):
+        super().__init__()
+        self.head_mlp = create_mlp_from_state(main_config.mlp_version, state, 'summary.')
+        self.feat_mlp = create_mlp_from_state(main_config.mlp_version, state, 'feature.')
+    def forward(self, input: AdaptorInput) -> RadioOutput:
+        summary = self.head_mlp(input.summary)
+        feat = self.feat_mlp(input.features)
+        return RadioOutput(summary, feat)

adaptor_mlp.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import math
+from typing import Dict
+import torch
+from torch import nn
+from einops import rearrange
+from timm.models.vision_transformer import Block
+class MLP(nn.Module):
+    def __init__(self, input_size: int, hidden_size: int, output_size: int,
+                 num_inner: int = 0, device: torch.device = None, **kwargs):
+        super(MLP, self).__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size, device=device)
+        self.norm = nn.LayerNorm(hidden_size, device=device)
+        self.relu = nn.ReLU()
+        inner = []
+        for _ in range(num_inner):
+            inner.extend([
+                nn.Linear(hidden_size, hidden_size, device=device),
+                nn.LayerNorm(hidden_size, device=device),
+                nn.ReLU(),
+            ])
+        if inner:
+            self.inner = nn.Sequential(*inner)
+        else:
+            self.inner = nn.Identity()
+        self.fc2 = nn.Linear(hidden_size, output_size, device=device)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.fc1(x)
+        x = self.norm(x)
+        x = self.relu(x)
+        x = self.inner(x)
+        x = self.fc2(x)
+        return x
+class MLP2(nn.Module):
+    def __init__(self, input_size: int, hidden_size: int, output_size: int,
+                 num_inner: int = 0,
+                 pre_norm: bool = False, device: torch.device = None,
+                 upsample_factor: int = 1,
+                 **kwargs):
+        super().__init__()
+        self.pre_norm = nn.Sequential(
+            nn.LayerNorm(input_size),
+            nn.GELU(),
+        ) if pre_norm else nn.Identity()
+        self.upsample_factor = upsample_factor
+        self._real_output_dim = output_size
+        hidden_size *= upsample_factor
+        output_size *= (upsample_factor ** 2)
+        self.fc1 = nn.Linear(input_size, hidden_size, device=device)
+        blocks = []
+        for _ in range(num_inner):
+            blocks.append(nn.Sequential(
+                nn.LayerNorm(hidden_size, device=device),
+                nn.GELU(),
+                nn.Linear(hidden_size, hidden_size, device=device),
+            ))
+        self.blocks = nn.ModuleList(blocks)
+        self.final = nn.Sequential(
+            nn.LayerNorm(hidden_size, device=device),
+            nn.GELU(),
+            nn.Linear(hidden_size, output_size, device=device),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.pre_norm(x)
+        x = self.fc1(x)
+        for block in self.blocks:
+            x = x + block(x)
+        x = self.final(x)
+        if self.upsample_factor > 1:
+            h = w = int(math.sqrt(x.shape[1]))
+            x = rearrange(x, 'b (h w) (u1 u2 c) -> b (u1 h u2 w) c',
+                          h=h, w=w, u1=self.upsample_factor, u2=self.upsample_factor,
+                          c=self._real_output_dim)
+        return x
+MLP_FACTORY = {
+    'v1': MLP,
+    'v2': MLP2,
+}
+def strip_prefix(state: Dict[str, torch.Tensor], prefix: str):
+    state = {
+        k[len(prefix):]: v
+        for k, v in state.items()
+        if k.startswith(prefix)
+    }
+    return state
+def get_mlp_info_from_state(version: str, state: Dict[str, torch.Tensor], prefix: str = ''):
+    state = strip_prefix(state, prefix)
+    if version == 'v1':
+        hidden_dim, input_dim = state['fc1.weight'].shape
+        output_dim = state['fc2.weight'].shape[0]
+        for num_inner in range(1000):
+            k = f'inner.{num_inner}.0.weight'
+            if k not in state:
+                break
+    elif version == 'v2':
+        hidden_dim, input_dim = state['fc1.weight'].shape
+        output_dim = state['final.2.weight'].shape[0]
+        for num_inner in range(1000):
+            k = f'blocks.{num_inner}.0.weight'
+            if k not in state:
+                break
+    else:
+        raise ValueError(f'Unsupported MLP version: {version}')
+    return input_dim, hidden_dim, output_dim, num_inner
+def create_mlp_from_state(version: str, state: Dict[str, torch.Tensor], prefix: str = ''):
+    state = strip_prefix(state, prefix)
+    input_dim, hidden_dim, output_dim, num_inner = get_mlp_info_from_state(version, state)
+    ret: nn.Module = MLP_FACTORY[version](input_dim, hidden_dim, output_dim, num_inner)
+    ret.load_state_dict(state)
+    return ret

adaptor_registry.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+from argparse import Namespace
+from typing import Dict, Any
+import torch
+from .adaptor_generic import GenericAdaptor, AdaptorBase
+dict_t = Dict[str, Any]
+state_t = Dict[str, torch.Tensor]
+class AdaptorRegistry:
+    def __init__(self):
+        self._registry = {}
+    def register_adaptor(self, name):
+        def decorator(factory_function):
+            if name in self._registry:
+                raise ValueError(f"Model '{name}' already registered")
+            self._registry[name] = factory_function
+            return factory_function
+        return decorator
+    def create_adaptor(self, name, main_config: Namespace, adaptor_config: dict_t, state: state_t) -> AdaptorBase:
+        if name not in self._registry:
+            return GenericAdaptor(main_config, adaptor_config, state)
+        return self._registry[name](main_config, adaptor_config, state)
+# Creating an instance of the registry
+adaptor_registry = AdaptorRegistry()

hf_model.py CHANGED Viewed

@@ -21,7 +21,11 @@ from transformers import PretrainedConfig, PreTrainedModel
 from .common import RESOURCE_MAP, DEFAULT_VERSION
-# Force import of eradio_model in order to register it.
 from .eradio_model import eradio
 from .radio_model import create_model_from_args
 from .radio_model import RADIOModel as RADIOModelBase, Resolution

 from .common import RESOURCE_MAP, DEFAULT_VERSION
+# Import all required modules.
+from .adaptor_base import AdaptorBase, RadioOutput, AdaptorInput
+from .adaptor_registry import adaptor_registry
+from .enable_cpe_support import enable_cpe
+from .enable_spectral_reparam import configure_spectral_reparam_from_args
 from .eradio_model import eradio
 from .radio_model import create_model_from_args
 from .radio_model import RADIOModel as RADIOModelBase, Resolution