Spaces:

kfoughali
/

serpent

Sleeping

App Files Files Community

kfoughali commited on Jul 29

Commit

a7a0326

verified ·

1 Parent(s): 93db32e

Update core/graph_mamba.py

Browse files

Files changed (1) hide show

core/graph_mamba.py +113 -200

core/graph_mamba.py CHANGED Viewed

@@ -1,16 +1,15 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch_geometric.utils import degree, to_dense_batch
 import networkx as nx
-import numpy as np
 import logging
 logger = logging.getLogger(__name__)
 class MambaBlock(nn.Module):
-    """Enhanced Mamba block with optimizations"""
-    def __init__(self, d_model, d_state=16, d_conv=4, expand=2):
         super().__init__()
         self.d_model = d_model
         self.d_inner = int(expand * d_model)
@@ -27,6 +26,9 @@ class MambaBlock(nn.Module):
         self.D = nn.Parameter(torch.ones(self.d_inner))
         self.out_proj = nn.Linear(self.d_inner, d_model, bias=False)
     def forward(self, x):
         batch, length, d_model = x.shape
         xz = self.in_proj(x)
@@ -36,10 +38,11 @@ class MambaBlock(nn.Module):
         x = self.conv1d(x)[:, :, :length]
         x = x.transpose(1, 2)
         x = self.act(x)
         y = self.selective_scan(x)
         y = y * self.act(z)
-        return self.out_proj(y)
     def selective_scan(self, x):
         batch, length, d_inner = x.shape
@@ -61,255 +64,165 @@ class MambaBlock(nn.Module):
         return torch.stack(outputs, dim=1)
-class EnhancedGraphOrdering:
-    """Advanced graph ordering strategies"""
     @staticmethod
-    def pagerank_ordering(edge_index, num_nodes):
-        """PageRank-based ordering preserving importance"""
-        try:
-            G = nx.Graph()
-            if edge_index.size(1) > 0:
-                edges = edge_index.t().cpu().numpy()
-                G.add_edges_from(edges)
-            G.add_nodes_from(range(num_nodes))
-            pagerank = nx.pagerank(G, max_iter=50)
-            order = sorted(range(num_nodes), key=lambda x: pagerank.get(x, 0), reverse=True)
-            return torch.tensor(order, dtype=torch.long)
-        except:
-            return torch.arange(num_nodes, dtype=torch.long)
     @staticmethod
-    def community_aware_ordering(edge_index, num_nodes):
-        """Community-preserving ordering"""
-        try:
-            G = nx.Graph()
-            if edge_index.size(1) > 0:
-                edges = edge_index.t().cpu().numpy()
-                G.add_edges_from(edges)
-            G.add_nodes_from(range(num_nodes))
-            communities = nx.community.greedy_modularity_communities(G)
-            order = []
-            for community in communities:
-                # Sort within community by degree
-                community_list = list(community)
-                degrees = {node: G.degree(node) for node in community_list}
-                community_sorted = sorted(community_list, key=lambda x: degrees[x], reverse=True)
-                order.extend(community_sorted)
-            return torch.tensor(order, dtype=torch.long)
-        except:
-            return torch.arange(num_nodes, dtype=torch.long)
-class StructuralEncoding(nn.Module):
-    """Multi-faceted structural encoding"""
-    def __init__(self, d_model, max_nodes=5000, max_degree=100):
         super().__init__()
-        self.pos_encoding = nn.Embedding(max_nodes, d_model)
         self.degree_encoding = nn.Embedding(max_degree, d_model)
-        self.centrality_proj = nn.Linear(1, d_model)
-        self.layer_norm = nn.LayerNorm(d_model)
-    def forward(self, x, edge_index, node_order=None):
-        num_nodes = x.size(0)
-        device = x.device
-        # Position encoding
-        positions = torch.arange(num_nodes, device=device).clamp(max=self.pos_encoding.num_embeddings-1)
-        pos_emb = self.pos_encoding(positions)
-        # Degree encoding
-        degrees = degree(edge_index[0], num_nodes).long().clamp(max=self.degree_encoding.num_embeddings-1)
-        degree_emb = self.degree_encoding(degrees)
-        # Simple centrality (normalized degree)
-        centrality = degrees.float() / max(degrees.max().item(), 1.0)
-        centrality_emb = self.centrality_proj(centrality.unsqueeze(-1))
-        # Combine encodings
-        structural_emb = pos_emb + degree_emb + centrality_emb
-        return self.layer_norm(x + structural_emb)
-class MultiScaleGraphMamba(nn.Module):
-    """Multi-scale processing with different orderings"""
-    def __init__(self, d_model, n_layers=3):
-        super().__init__()
-        self.d_model = d_model
-        # Different scale processors
-        self.local_mamba = nn.ModuleList([MambaBlock(d_model) for _ in range(n_layers//2)])
-        self.global_mamba = nn.ModuleList([MambaBlock(d_model) for _ in range(n_layers//2)])
-        # Fusion layers
-        self.scale_fusion = nn.Linear(d_model * 2, d_model)
         self.layer_norm = nn.LayerNorm(d_model)
     def forward(self, x, edge_index):
         num_nodes = x.size(0)
-        # Different orderings
-        local_order = torch.arange(num_nodes)  # BFS equivalent
-        global_order = EnhancedGraphOrdering.pagerank_ordering(edge_index, num_nodes)
-        # Process local scale
-        x_local = x[local_order].unsqueeze(0)
-        for layer in self.local_mamba:
-            x_local = x_local + layer(x_local)
-        x_local = x_local.squeeze(0)
-        # Process global scale
-        x_global = x[global_order].unsqueeze(0)
-        for layer in self.global_mamba:
-            x_global = x_global + layer(x_global)
-        x_global = x_global.squeeze(0)
-        # Restore original order
-        local_restored = torch.zeros_like(x_local)
-        global_restored = torch.zeros_like(x_global)
-        local_restored[local_order] = x_local
-        global_restored[global_order] = x_global
-        # Fuse scales
-        fused = torch.cat([local_restored, global_restored], dim=-1)
-        return self.layer_norm(self.scale_fusion(fused))
 class GraphMamba(nn.Module):
-    """Enhanced GraphMamba with accuracy improvements"""
     def __init__(self, config):
         super().__init__()
         self.config = config
-        d_model = config['model']['d_model']
-        n_layers = config['model']['n_layers']
-        self.ordering_strategy = config['ordering']['strategy']
-        # Input projection
-        self.input_proj = nn.Linear(config.get('input_dim', 1433), d_model)
-        # Structural encoding
-        self.structural_encoding = StructuralEncoding(d_model)
-        # Multi-scale processing
-        self.multi_scale = MultiScaleGraphMamba(d_model, n_layers)
-        # Additional Mamba layers
         self.mamba_layers = nn.ModuleList([
-            MambaBlock(d_model) for _ in range(max(1, n_layers - 2))
         ])
-        # Layer norms
         self.layer_norms = nn.ModuleList([
-            nn.LayerNorm(d_model) for _ in range(len(self.mamba_layers))
         ])
-        # Output projection
         self.output_proj = nn.Linear(d_model, d_model)
-        self.dropout = nn.Dropout(config['model']['dropout'])
-        # For node classification
         self.classifier = None
-    def _get_ordering(self, edge_index, num_nodes):
-        """Get node ordering based on strategy"""
-        if self.ordering_strategy == 'pagerank':
-            return EnhancedGraphOrdering.pagerank_ordering(edge_index, num_nodes)
-        elif self.ordering_strategy == 'community':
-            return EnhancedGraphOrdering.community_aware_ordering(edge_index, num_nodes)
-        elif self.ordering_strategy == 'spectral':
-            return self._spectral_ordering(edge_index, num_nodes)
-        else:  # BFS default
-            return torch.arange(num_nodes, dtype=torch.long)
-    def _spectral_ordering(self, edge_index, num_nodes):
-        """Spectral ordering with fallback"""
-        try:
-            from torch_geometric.utils import get_laplacian
-            edge_index_lap, edge_weight = get_laplacian(edge_index, num_nodes=num_nodes)
-            # Simple degree-based approximation
-            degrees = degree(edge_index[0], num_nodes)
-            return torch.argsort(degrees, descending=True)
-        except:
-            return torch.arange(num_nodes, dtype=torch.long)
     def forward(self, x, edge_index, batch=None):
-        """Enhanced forward pass"""
-        # Input projection
-        h = self.input_proj(x)
-        # Add structural information
-        h = self.structural_encoding(h, edge_index)
-        # Multi-scale processing
-        h = self.multi_scale(h, edge_index)
-        # Additional sequential processing
-        order = self._get_ordering(edge_index, h.size(0))
         h_ordered = h[order].unsqueeze(0)
-        for mamba, ln in zip(self.mamba_layers, self.layer_norms):
             residual = h_ordered
             h_ordered = ln(h_ordered)
-            h_ordered = residual + self.dropout(mamba(h_ordered))
-        # Restore original order
-        h_restored = torch.zeros_like(h_ordered.squeeze(0))
-        h_restored[order] = h_ordered.squeeze(0)
-        return self.output_proj(h_restored)
     def _init_classifier(self, num_classes, device):
-        """Initialize classifier head"""
         if self.classifier is None:
-            self.classifier = nn.Linear(self.config['model']['d_model'], num_classes).to(device)
     def get_performance_stats(self):
-        """Get model performance statistics"""
         total_params = sum(p.numel() for p in self.parameters())
         return {
             'total_params': total_params,
             'device': next(self.parameters()).device,
             'dtype': next(self.parameters()).dtype,
-            'ordering_strategy': self.ordering_strategy
         }
-class HybridGraphMamba(nn.Module):
-    """Hybrid approach with minimal GNN overhead"""
-    def __init__(self, config):
-        super().__init__()
-        from torch_geometric.nn import GCNConv
-        d_model = config['model']['d_model']
-        self.graph_mamba = GraphMamba(config)
-        self.gcn = GCNConv(d_model, d_model)
-        self.gate = nn.Linear(d_model, 1)
-        self.fusion = nn.Linear(d_model * 2, d_model)
-    def forward(self, x, edge_index, batch=None):
-        # Get both representations
-        mamba_out = self.graph_mamba(x, edge_index, batch)
-        gcn_out = self.gcn(mamba_out, edge_index)
-        # Learned fusion
-        gate_weight = torch.sigmoid(self.gate(mamba_out))
-        weighted = gate_weight * mamba_out + (1 - gate_weight) * gcn_out
-        # Final fusion
-        combined = torch.cat([mamba_out, weighted], dim=-1)
-        return self.fusion(combined)
-    def _init_classifier(self, num_classes, device):
-        """Initialize classifier for hybrid model"""
-        if not hasattr(self, 'classifier') or self.classifier is None:
-            self.classifier = nn.Linear(self.config['model']['d_model'], num_classes).to(device)
-    def get_performance_stats(self):
-        """Get hybrid model stats"""
-        return self.graph_mamba.get_performance_stats()

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch_geometric.utils import degree
 import networkx as nx
 import logging
 logger = logging.getLogger(__name__)
 class MambaBlock(nn.Module):
+    """Heavily regularized Mamba block"""
+    def __init__(self, d_model, d_state=4, d_conv=4, expand=2):
         super().__init__()
         self.d_model = d_model
         self.d_inner = int(expand * d_model)
         self.D = nn.Parameter(torch.ones(self.d_inner))
         self.out_proj = nn.Linear(self.d_inner, d_model, bias=False)
+        # Heavy regularization
+        self.dropout = nn.Dropout(0.3)
     def forward(self, x):
         batch, length, d_model = x.shape
         xz = self.in_proj(x)
         x = self.conv1d(x)[:, :, :length]
         x = x.transpose(1, 2)
         x = self.act(x)
+        x = self.dropout(x)
         y = self.selective_scan(x)
         y = y * self.act(z)
+        return self.dropout(self.out_proj(y))
     def selective_scan(self, x):
         batch, length, d_inner = x.shape
         return torch.stack(outputs, dim=1)
+class GraphDataAugmentation:
+    """Data augmentation to combat overfitting"""
     @staticmethod
+    def augment_features(x, noise_level=0.1, dropout_prob=0.2):
+        if x.size(0) == 0:
+            return x
+        # Feature noise
+        noise = torch.randn_like(x) * noise_level
+        x_aug = x + noise
+        # Feature dropout
+        mask = torch.rand(x.shape[0], x.shape[1], device=x.device) > dropout_prob
+        x_aug = x_aug * mask.float()
+        return x_aug
     @staticmethod
+    def augment_edges(edge_index, drop_prob=0.1):
+        if edge_index.size(1) == 0:
+            return edge_index
+        # Edge dropout
+        edge_mask = torch.rand(edge_index.size(1), device=edge_index.device) > drop_prob
+        return edge_index[:, edge_mask]
+class LightStructuralEncoding(nn.Module):
+    """Lightweight structural encoding"""
+    def __init__(self, d_model, max_degree=50):
         super().__init__()
         self.degree_encoding = nn.Embedding(max_degree, d_model)
         self.layer_norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(0.5)
     def forward(self, x, edge_index):
         num_nodes = x.size(0)
+        # Only degree encoding (simpler)
+        degrees = degree(edge_index[0], num_nodes).long().clamp(max=49)
+        degree_emb = self.degree_encoding(degrees)
+        # Combine with heavy dropout
+        combined = self.layer_norm(x + degree_emb)
+        return self.dropout(combined)
 class GraphMamba(nn.Module):
+    """Heavily regularized GraphMamba to prevent overfitting"""
     def __init__(self, config):
         super().__init__()
         self.config = config
+        d_model = config['model']['d_model']  # Should be 64
+        n_layers = config['model']['n_layers']  # Should be 2
+        input_dim = config.get('input_dim', 1433)
+        # Minimal architecture
+        self.input_proj = nn.Linear(input_dim, d_model)
+        self.input_dropout = nn.Dropout(0.5)
+        # Light structural encoding
+        self.structural_encoding = LightStructuralEncoding(d_model)
+        # Minimal Mamba layers
         self.mamba_layers = nn.ModuleList([
+            MambaBlock(d_model, d_state=4) for _ in range(n_layers)
         ])
+        # Layer norms with dropout
         self.layer_norms = nn.ModuleList([
+            nn.LayerNorm(d_model) for _ in range(n_layers)
         ])
+        self.hidden_dropout = nn.Dropout(0.5)
+        self.output_dropout = nn.Dropout(0.3)
+        # Simple output
         self.output_proj = nn.Linear(d_model, d_model)
+        # Data augmentation
+        self.augmentation = GraphDataAugmentation()
+        # Classifier will be added later
         self.classifier = None
     def forward(self, x, edge_index, batch=None):
+        # Apply data augmentation during training
+        if self.training:
+            x = self.augmentation.augment_features(x)
+            edge_index = self.augmentation.augment_edges(edge_index)
+        # Input projection with dropout
+        h = self.input_dropout(self.input_proj(x))
+        # Add minimal structural information
+        h = self.structural_encoding(h, edge_index)
+        # Simple BFS ordering only
+        order = torch.arange(h.size(0), device=h.device)
         h_ordered = h[order].unsqueeze(0)
+        # Process through minimal Mamba layers
+        for i, (mamba, ln) in enumerate(zip(self.mamba_layers, self.layer_norms)):
             residual = h_ordered
             h_ordered = ln(h_ordered)
+            h_ordered = residual + mamba(h_ordered)
+            h_ordered = self.hidden_dropout(h_ordered)
+        # Restore order and final processing
+        h_restored = h_ordered.squeeze(0)
+        h_out = self.output_dropout(self.output_proj(h_restored))
+        return h_out
     def _init_classifier(self, num_classes, device):
+        """Initialize heavily regularized classifier"""
         if self.classifier is None:
+            self.classifier = nn.Sequential(
+                nn.Dropout(0.5),
+                nn.Linear(self.config['model']['d_model'], num_classes)
+            ).to(device)
     def get_performance_stats(self):
+        """Get model statistics"""
         total_params = sum(p.numel() for p in self.parameters())
         return {
             'total_params': total_params,
             'device': next(self.parameters()).device,
             'dtype': next(self.parameters()).dtype,
+            'model_size': f"{total_params/1000:.1f}K parameters"
         }
+def create_regularized_config():
+    """Create config optimized for small training sets"""
+    return {
+        'model': {
+            'd_model': 64,        # Reduced from 128
+            'd_state': 4,         # Reduced from 8
+            'd_conv': 4,
+            'expand': 2,
+            'n_layers': 2,        # Reduced from 3
+            'dropout': 0.5        # Increased from 0.1
+        },
+        'data': {
+            'batch_size': 1,      # Full batch for small datasets
+            'test_split': 0.2
+        },
+        'training': {
+            'learning_rate': 0.0005,  # Reduced from 0.001
+            'weight_decay': 0.01,     # High regularization
+            'epochs': 200,
+            'patience': 10,           # More patient early stopping
+            'warmup_epochs': 10,
+            'min_lr': 1e-6
+        },
+        'ordering': {
+            'strategy': 'bfs',        # Simple strategy only
+            'preserve_locality': True
+        },
+        'input_dim': 1433
+    }