Spaces:

kfoughali
/

serpent

Running

App Files Files Community

kfoughali commited on Jul 29

Commit

e4d5cc2

verified ·

1 Parent(s): 8e24e05

Update core/graph_mamba.py

Browse files

Files changed (1) hide show

core/graph_mamba.py +81 -36

core/graph_mamba.py CHANGED Viewed

@@ -5,8 +5,7 @@ from .graph_sequencer import GraphSequencer, PositionalEncoder
 class GraphMamba(nn.Module):
     """
-    Production Graph-Mamba model
-    Device-safe implementation with dynamic handling
     """
     def __init__(self, config):
@@ -19,13 +18,13 @@ class GraphMamba(nn.Module):
         self.ordering_strategy = config['ordering']['strategy']
         # Input projection (dynamic input dimension)
-        self.input_proj = None  # Will be initialized on first forward
         # Positional encoding
         self.pos_encoder = PositionalEncoder()
-        self.pos_embed = nn.Linear(11, self.d_model)  # 1 + 10 distances
-        # Mamba layers
         self.mamba_layers = nn.ModuleList([
             MambaBlock(
                 d_model=self.d_model,
@@ -48,27 +47,36 @@ class GraphMamba(nn.Module):
         # Graph sequencer
         self.sequencer = GraphSequencer()
-        # Classification head (for demo)
         self.classifier = None
     def _init_input_proj(self, input_dim, device):
         """Initialize input projection dynamically"""
         if self.input_proj is None:
-            self.input_proj = nn.Linear(input_dim, self.d_model).to(device)
     def _init_classifier(self, num_classes, device):
         """Initialize classifier dynamically"""
         if self.classifier is None:
-            self.classifier = nn.Linear(self.d_model, num_classes).to(device)
     def forward(self, x, edge_index, batch=None):
         """
-        Forward pass with device-safe handling
-        Args:
-            x: Node features (num_nodes, input_dim)
-            edge_index: Edge connectivity (2, num_edges)
-            batch: Batch assignment (num_nodes,) - optional
         """
         num_nodes = x.size(0)
         input_dim = x.size(1)
@@ -93,22 +101,31 @@ class GraphMamba(nn.Module):
         return h
     def _process_single_graph(self, h, edge_index):
-        """Process a single graph - device safe"""
         num_nodes = h.size(0)
         device = h.device
         # Ensure edge_index is on correct device
         edge_index = edge_index.to(device)
-        # Get ordering
-        if self.ordering_strategy == "spectral":
-            order = self.sequencer.spectral_ordering(edge_index, num_nodes)
-        elif self.ordering_strategy == "degree":
-            order = self.sequencer.degree_ordering(edge_index, num_nodes)
-        elif self.ordering_strategy == "community":
-            order = self.sequencer.community_ordering(edge_index, num_nodes)
-        else:  # default to BFS
-            order = self.sequencer.bfs_ordering(edge_index, num_nodes)
         # Ensure order is on correct device
         order = order.to(device)
@@ -125,10 +142,17 @@ class GraphMamba(nn.Module):
         h_ordered = h[order] + pos_embed[order]  # Add positional encoding
         h_ordered = h_ordered.unsqueeze(0)  # (1, num_nodes, d_model)
-        # Process through Mamba layers
-        for mamba, ln in zip(self.mamba_layers, self.layer_norms):
-            # Pre-norm residual connection
-            h_ordered = h_ordered + self.dropout_layer(mamba(ln(h_ordered)))
         # Restore original order
         h_out = h_ordered.squeeze(0)  # (num_nodes, d_model)
@@ -140,7 +164,7 @@ class GraphMamba(nn.Module):
         return h_final
     def _process_batch(self, h, edge_index, batch):
-        """Process batched graphs - device safe"""
         device = h.device
         batch = batch.to(device)
         edge_index = edge_index.to(device)
@@ -180,12 +204,19 @@ class GraphMamba(nn.Module):
         return h_out
     def get_graph_embedding(self, h, batch=None):
-        """Get graph-level representation"""
         if batch is None:
-            # Single graph - mean pooling
-            return h.mean(dim=0, keepdim=True)
         else:
-            # Batched graphs - manual pooling to avoid dependencies
             device = h.device
             batch = batch.to(device)
             batch_size = batch.max().item() + 1
@@ -194,9 +225,23 @@ class GraphMamba(nn.Module):
             for b in range(batch_size):
                 mask = batch == b
                 if mask.any():
-                    graph_emb = h[mask].mean(dim=0)
                     graph_embeddings.append(graph_emb)
                 else:
-                    graph_embeddings.append(torch.zeros(h.size(1), device=device))
-            return torch.stack(graph_embeddings)

 class GraphMamba(nn.Module):
     """
+    Production Graph-Mamba model with training optimizations
     """
     def __init__(self, config):
         self.ordering_strategy = config['ordering']['strategy']
         # Input projection (dynamic input dimension)
+        self.input_proj = None
         # Positional encoding
         self.pos_encoder = PositionalEncoder()
+        self.pos_embed = nn.Linear(11, self.d_model)
+        # Mamba layers with residual connections
         self.mamba_layers = nn.ModuleList([
             MambaBlock(
                 d_model=self.d_model,
         # Graph sequencer
         self.sequencer = GraphSequencer()
+        # Classification head (initialized dynamically)
         self.classifier = None
+        # Cache for efficiency
+        self._cache = {}
     def _init_input_proj(self, input_dim, device):
         """Initialize input projection dynamically"""
         if self.input_proj is None:
+            self.input_proj = nn.Sequential(
+                nn.Linear(input_dim, self.d_model),
+                nn.LayerNorm(self.d_model),
+                nn.ReLU(),
+                nn.Dropout(self.dropout * 0.5)
+            ).to(device)
     def _init_classifier(self, num_classes, device):
         """Initialize classifier dynamically"""
         if self.classifier is None:
+            self.classifier = nn.Sequential(
+                nn.Linear(self.d_model, self.d_model // 2),
+                nn.LayerNorm(self.d_model // 2),
+                nn.ReLU(),
+                nn.Dropout(self.dropout),
+                nn.Linear(self.d_model // 2, num_classes)
+            ).to(device)
     def forward(self, x, edge_index, batch=None):
         """
+        Forward pass with training optimizations
         """
         num_nodes = x.size(0)
         input_dim = x.size(1)
         return h
     def _process_single_graph(self, h, edge_index):
+        """Process a single graph with caching"""
         num_nodes = h.size(0)
         device = h.device
         # Ensure edge_index is on correct device
         edge_index = edge_index.to(device)
+        # Cache key for ordering
+        cache_key = f"{self.ordering_strategy}_{num_nodes}_{edge_index.shape[1]}"
+        # Get ordering (with caching during training)
+        if cache_key not in self._cache or not self.training:
+            if self.ordering_strategy == "spectral":
+                order = self.sequencer.spectral_ordering(edge_index, num_nodes)
+            elif self.ordering_strategy == "degree":
+                order = self.sequencer.degree_ordering(edge_index, num_nodes)
+            elif self.ordering_strategy == "community":
+                order = self.sequencer.community_ordering(edge_index, num_nodes)
+            else:  # default to BFS
+                order = self.sequencer.bfs_ordering(edge_index, num_nodes)
+            if self.training:
+                self._cache[cache_key] = order
+        else:
+            order = self._cache[cache_key]
         # Ensure order is on correct device
         order = order.to(device)
         h_ordered = h[order] + pos_embed[order]  # Add positional encoding
         h_ordered = h_ordered.unsqueeze(0)  # (1, num_nodes, d_model)
+        # Process through Mamba layers with residual connections
+        for i, (mamba, ln) in enumerate(zip(self.mamba_layers, self.layer_norms)):
+            # Pre-norm residual connection with gradient scaling
+            residual = h_ordered
+            h_ordered = ln(h_ordered)
+            h_ordered = mamba(h_ordered)
+            h_ordered = residual + self.dropout_layer(h_ordered)
+            # Layer-wise learning rate scaling
+            if self.training:
+                h_ordered = h_ordered * (1.0 - 0.1 * i / self.n_layers)
         # Restore original order
         h_out = h_ordered.squeeze(0)  # (num_nodes, d_model)
         return h_final
     def _process_batch(self, h, edge_index, batch):
+        """Process batched graphs efficiently"""
         device = h.device
         batch = batch.to(device)
         edge_index = edge_index.to(device)
         return h_out
     def get_graph_embedding(self, h, batch=None):
+        """Get graph-level representation with multiple pooling"""
         if batch is None:
+            # Single graph - multiple pooling strategies
+            mean_pool = h.mean(dim=0, keepdim=True)
+            max_pool = h.max(dim=0)[0].unsqueeze(0)
+            # Attention pooling
+            attn_weights = torch.softmax(h.sum(dim=1), dim=0)
+            attn_pool = (h * attn_weights.unsqueeze(1)).sum(dim=0, keepdim=True)
+            return torch.cat([mean_pool, max_pool, attn_pool], dim=1)
         else:
+            # Batched graphs
             device = h.device
             batch = batch.to(device)
             batch_size = batch.max().item() + 1
             for b in range(batch_size):
                 mask = batch == b
                 if mask.any():
+                    batch_h = h[mask]
+                    # Multiple pooling for this graph
+                    mean_pool = batch_h.mean(dim=0)
+                    max_pool = batch_h.max(dim=0)[0]
+                    attn_weights = torch.softmax(batch_h.sum(dim=1), dim=0)
+                    attn_pool = (batch_h * attn_weights.unsqueeze(1)).sum(dim=0)
+                    graph_emb = torch.cat([mean_pool, max_pool, attn_pool])
                     graph_embeddings.append(graph_emb)
                 else:
+                    # Empty graph
+                    graph_embeddings.append(torch.zeros(h.size(1) * 3, device=device))
+            return torch.stack(graph_embeddings)
+    def clear_cache(self):
+        """Clear ordering cache"""
+        self._cache.clear()