Spaces:

kfoughali
/

serpent

Running

App Files Files Community

kfoughali commited on Jul 30

Commit

b74043a

verified ·

1 Parent(s): e4e8b6b

Update core/graph_mamba.py

Browse files

Files changed (1) hide show

core/graph_mamba.py +299 -219

core/graph_mamba.py CHANGED Viewed

@@ -8,206 +8,173 @@ import logging
 logger = logging.getLogger(__name__)
-class CognitiveMomentumEngine(nn.Module):
-    """Core cognitive momentum system from the document"""
-    def __init__(self, d_model):
         super().__init__()
         self.d_model = d_model
-        # Momentum tracking
-        self.register_buffer('momentum_vectors', torch.zeros(d_model))
-        self.register_buffer('cognitive_mass', torch.ones(d_model))
-        self.register_buffer('kinetic_energy', torch.zeros(d_model))
-        self.register_buffer('potential_energy', torch.zeros(d_model))
-        # Field interactions
-        self.attraction_projection = nn.Linear(d_model, d_model)
-        self.repulsion_projection = nn.Linear(d_model, d_model)
-        # Crystallization threshold
-        self.crystallization_threshold = 0.1
-        self.memory_decay = 0.99
-    def update_momentum(self, concept_features, force, dt=0.1):
-        """Apply cognitive momentum physics"""
-        # F = ma => a = F/m
-        acceleration = force / (self.cognitive_mass + 1e-8)
-        # Update velocity: v = v₀ + at
-        current_velocity = self.momentum_vectors / (self.cognitive_mass + 1e-8)
-        new_velocity = current_velocity + acceleration * dt
-        # Update momentum: p = mv
-        self.momentum_vectors = self.cognitive_mass * new_velocity
-        # Update energy
-        self.kinetic_energy = 0.5 * self.cognitive_mass * (new_velocity ** 2)
-        return self.momentum_vectors
-    def crystallize_knowledge(self):
-        """Compress low-momentum concepts"""
-        low_momentum_mask = torch.abs(self.momentum_vectors) < self.crystallization_threshold
-        # Compress crystallized knowledge
-        crystallized_pattern = self.momentum_vectors[low_momentum_mask].mean()
-        # Reset crystallized components
-        self.momentum_vectors[low_momentum_mask] = crystallized_pattern * 0.1
-        return crystallized_pattern
-    def forward(self, x):
-        """Apply momentum to features"""
-        if x.dim() == 2:
-            x = x.unsqueeze(0)
-        batch_size, seq_len, d_model = x.shape
-        # Compute forces from feature interactions
-        attraction_force = self.attraction_projection(x)
-        repulsion_force = self.repulsion_projection(x)
-        # Net force
-        net_force = attraction_force - repulsion_force * 0.1
-        # Simple momentum application
-        momentum_enhanced = x + net_force * 0.1
-        # Crystallize periodically
-        if torch.rand(1) < 0.1:
-            self.crystallize_knowledge()
-        return momentum_enhanced
-class AstrocyteLayer(nn.Module):
-    """Multi-timescale processing with momentum"""
-    def __init__(self, d_model, astrocyte_ratio=2.0):
         super().__init__()
         self.d_model = d_model
-        self.d_astrocyte = int(d_model * astrocyte_ratio)
-        # Fast neuronal processing
-        self.neuron_fast = nn.Linear(d_model, d_model)
-        self.neuron_dropout = nn.Dropout(0.1)
-        # Slow astrocyte processing
-        self.astrocyte_slow = nn.Linear(d_model, self.d_astrocyte)
-        self.astrocyte_integration = nn.Linear(self.d_astrocyte, d_model)
-        self.astrocyte_dropout = nn.Dropout(0.1)
-        # Cognitive momentum
-        self.momentum_engine = CognitiveMomentumEngine(d_model)
-        # Multi-timescale gates
-        self.fast_gate = nn.Linear(d_model, d_model)
-        self.slow_gate = nn.Linear(self.d_astrocyte, d_model)
-        # Memory for slow dynamics
-        self.register_buffer('astrocyte_memory', torch.zeros(1, self.d_astrocyte))
-        self.memory_decay = 0.9
     def forward(self, x):
-        batch_size = x.size(0) if x.dim() == 3 else 1
         if x.dim() == 2:
-            x = x.unsqueeze(0)
-        if self.astrocyte_memory.size(0) != batch_size:
-            self.astrocyte_memory = torch.zeros(batch_size, self.d_astrocyte, device=x.device)
-        # Apply cognitive momentum
-        x_momentum = self.momentum_engine(x)
-        # Fast neuronal response
-        fast_out = self.neuron_dropout(torch.tanh(self.neuron_fast(x_momentum)))
-        # Slow astrocyte integration
-        astrocyte_input = self.astrocyte_slow(x_momentum)
-        self.astrocyte_memory = self.memory_decay * self.astrocyte_memory + (1 - self.memory_decay) * astrocyte_input.mean(dim=1)
-        slow_out = self.astrocyte_dropout(torch.tanh(self.astrocyte_integration(self.astrocyte_memory))).unsqueeze(1).expand(-1, x.size(1), -1)
-        # Multi-timescale gating
-        fast_gate = torch.sigmoid(self.fast_gate(x_momentum))
-        slow_gate = torch.sigmoid(self.slow_gate(self.astrocyte_memory)).unsqueeze(1).expand(-1, x.size(1), -1)
-        # Combine with momentum
-        output = fast_gate * fast_out + slow_gate * slow_out
-        return output.squeeze(0) if output.size(0) == 1 else output
-class PhysicsInformedMamba(nn.Module):
-    """Mamba with physics constraints and momentum"""
-    def __init__(self, d_model, d_state=8):
         super().__init__()
         self.d_model = d_model
-        self.d_inner = d_model * 2
-        self.d_state = d_state
-        self.in_proj = nn.Linear(d_model, self.d_inner * 2, bias=False)
-        self.conv1d = nn.Conv1d(self.d_inner, self.d_inner, 4, groups=self.d_inner, padding=3)
-        self.x_proj = nn.Linear(self.d_inner, d_state * 2 + 1, bias=False)
-        self.dt_proj = nn.Linear(1, self.d_inner, bias=True)
-        # Physics constraints
-        A = torch.arange(1, d_state + 1, dtype=torch.float32).unsqueeze(0).repeat(self.d_inner, 1)
-        self.A_log = nn.Parameter(torch.log(A))
-        self.D = nn.Parameter(torch.ones(self.d_inner))
-        self.out_proj = nn.Linear(self.d_inner, d_model, bias=False)
-        # Energy conservation
-        self.energy_projection = nn.Linear(d_model, d_model)
-    def forward(self, x):
-        if x.dim() == 2:
-            x = x.unsqueeze(0)
-        batch, length, _ = x.shape
-        # Energy conservation
-        total_energy = x.norm(dim=-1, keepdim=True)
-        xz = self.in_proj(x)
-        x_inner, z = xz.chunk(2, dim=-1)
-        # Convolution
-        x_inner = x_inner.transpose(1, 2)
-        x_inner = self.conv1d(x_inner)[:, :, :length]
-        x_inner = x_inner.transpose(1, 2)
-        x_inner = F.silu(x_inner)
-        # State space with physics
-        y = self.selective_scan(x_inner)
-        y = y * F.silu(z)
-        # Apply energy conservation
-        output = self.out_proj(y)
-        output_energy = output.norm(dim=-1, keepdim=True)
-        energy_scale = total_energy / (output_energy + 1e-8)
-        output = output * energy_scale
-        return output
-    def selective_scan(self, x):
-        batch, length, d_inner = x.shape
-        deltaBC = self.x_proj(x)
-        delta, B, C = torch.split(deltaBC, [1, self.d_state, self.d_state], dim=-1)
-        delta = F.softplus(self.dt_proj(delta))
-        deltaA = torch.exp(delta.unsqueeze(-1) * (-torch.exp(self.A_log)))
-        deltaB = delta.unsqueeze(-1) * B.unsqueeze(2)
-        states = torch.zeros(batch, d_inner, self.d_state, device=x.device)
-        outputs = []
-        for i in range(length):
-            states = deltaA[:, i] * states + deltaB[:, i] * x[:, i, :, None]
-            y = (states @ C[:, i, :, None]).squeeze(-1) + self.D * x[:, i]
-            outputs.append(y)
-        return torch.stack(outputs, dim=1)
-class CognitiveMambaGraphMamba(nn.Module):
-    """Revolutionary cognitive momentum architecture"""
     def __init__(self, config):
         super().__init__()
@@ -219,129 +186,243 @@ class CognitiveMambaGraphMamba(nn.Module):
         # Input processing
         self.input_proj = nn.Linear(input_dim, d_model)
         self.input_norm = nn.LayerNorm(d_model)
-        # GCN backbone for graph structure
         self.gcn_layers = nn.ModuleList([
             GCNConv(d_model, d_model) for _ in range(n_layers)
         ])
-        # Revolutionary components
         self.astrocyte_layers = nn.ModuleList([
             AstrocyteLayer(d_model) for _ in range(n_layers)
         ])
-        self.physics_mamba = PhysicsInformedMamba(d_model)
-        # Global cognitive momentum
-        self.global_momentum = CognitiveMomentumEngine(d_model)
-        # Layer norms
-        self.norms = nn.ModuleList([
             nn.LayerNorm(d_model) for _ in range(n_layers)
         ])
-        # Multi-path fusion
-        self.fusion_weights = nn.Parameter(torch.tensor([0.4, 0.3, 0.3]))  # GCN, Astrocyte, Mamba
-        self.dropout = nn.Dropout(0.1)
         self.classifier = None
     def forward(self, x, edge_index, batch=None):
         # Input processing
-        h = self.input_norm(self.input_proj(x))
-        # Multi-path processing with momentum
         for i in range(len(self.gcn_layers)):
             gcn = self.gcn_layers[i]
-            astrocyte = self.astrocyte_layers[i]
-            norm = self.norms[i]
-            # Path 1: GCN (graph structure)
-            h_gcn = F.relu(gcn(h, edge_index))
-            h_gcn = self.dropout(h_gcn)
-            # Path 2: Astrocyte (multi-timescale with momentum)
-            h_astrocyte = astrocyte(h.unsqueeze(0)).squeeze(0)
-            # Path 3: Physics-informed Mamba (sequential with physics)
-            h_mamba = self.physics_mamba(h.unsqueeze(0)).squeeze(0)
-            # Apply global cognitive momentum
-            h_combined = torch.stack([h_gcn, h_astrocyte, h_mamba], dim=0)  # (3, nodes, features)
-            h_combined = h_combined.permute(1, 0, 2)  # (nodes, 3, features)
-            h_momentum = self.global_momentum(h_combined.unsqueeze(0)).squeeze(0)  # (nodes, 3, features)
-            h_momentum = h_momentum.mean(dim=1)  # (nodes, features)
-            # Weighted fusion
-            weights = F.softmax(self.fusion_weights, dim=0)
-            h_fused = weights[0] * h_gcn + weights[1] * h_astrocyte + weights[2] * h_mamba + h_momentum * 0.1
-            # Residual + norm
-            h = norm(h + h_fused)
         return h
     def _init_classifier(self, num_classes, device):
         if self.classifier is None:
             self.classifier = nn.Sequential(
-                nn.Dropout(0.1),
                 nn.Linear(self.config['model']['d_model'], num_classes)
             ).to(device)
     def get_performance_stats(self):
         total_params = sum(p.numel() for p in self.parameters())
         return {
             'total_params': total_params,
             'device': next(self.parameters()).device,
             'dtype': next(self.parameters()).dtype,
             'model_size': f"{total_params/1000:.1f}K parameters"
         }
-class LegacyGraphMamba(nn.Module):
-    """Fallback simple version"""
     def __init__(self, config):
         super().__init__()
-        self.cognitive_mamba = CognitiveMambaGraphMamba(config)
         self.config = config
         self.classifier = None
     def forward(self, x, edge_index, batch=None):
-        return self.cognitive_mamba(x, edge_index, batch)
     def _init_classifier(self, num_classes, device):
-        self.classifier = nn.Sequential(
-            nn.Dropout(0.1),
-            nn.Linear(self.config['model']['d_model'], num_classes)
-        ).to(device)
-        self.cognitive_mamba.classifier = self.classifier
         return self.classifier
     def get_performance_stats(self):
-        return self.cognitive_mamba.get_performance_stats()
 def create_astrocyte_config():
-    """Revolutionary cognitive momentum configuration"""
     return {
         'model': {
-            'd_model': 128,
             'd_state': 8,
             'd_conv': 4,
             'expand': 2,
-            'n_layers': 4,
-            'dropout': 0.1
         },
         'data': {
             'batch_size': 1,
             'test_split': 0.2
         },
         'training': {
-            'learning_rate': 0.003,
-            'weight_decay': 0.001,
-            'epochs': 500,
-            'patience': 100,
-            'warmup_epochs': 25,
-            'min_lr': 1e-7,
             'label_smoothing': 0.0,
-            'max_gap': 0.3
         },
         'ordering': {
             'strategy': 'none',
@@ -350,9 +431,8 @@ def create_astrocyte_config():
         'input_dim': 1433
     }
-# Use simple working version for now
-AstrocyteGraphMamba = LegacyGraphMamba
-GraphMamba = LegacyGraphMamba
-HybridGraphMamba = LegacyGraphMamba
-QuantumEnhancedGraphMamba = LegacyGraphMamba
-create_regularized_config = create_astrocyte_config

 logger = logging.getLogger(__name__)
+class GraphDataAugmentation:
+    """Enhanced data augmentation for overfitting prevention"""
+    @staticmethod
+    def augment_features(x, noise_level=0.1, dropout_prob=0.05):
+        if not torch.is_tensor(x) or x.size(0) == 0:
+            return x
+        # Feature noise
+        noise = torch.randn_like(x) * noise_level
+        x_aug = x + noise
+        # Feature masking
+        mask = torch.rand(x.shape, device=x.device) > dropout_prob
+        return x_aug * mask.float()
+    @staticmethod
+    def augment_edges(edge_index, drop_prob=0.1):
+        if not torch.is_tensor(edge_index) or edge_index.size(1) == 0:
+            return edge_index
+        edge_mask = torch.rand(edge_index.size(1), device=edge_index.device) > drop_prob
+        return edge_index[:, edge_mask]
+class SimpleMambaBlock(nn.Module):
+    """Simplified Mamba block that actually works"""
+    def __init__(self, d_model, d_state=16):
         super().__init__()
         self.d_model = d_model
+        self.d_state = d_state
+        self.d_inner = d_model * 2
+        # Core projections
+        self.in_proj = nn.Linear(d_model, self.d_inner * 2, bias=False)
+        self.conv1d = nn.Conv1d(self.d_inner, self.d_inner, 3, groups=self.d_inner, padding=1)
+        self.out_proj = nn.Linear(self.d_inner, d_model, bias=False)
+        # State space parameters
+        self.dt_proj = nn.Linear(self.d_inner, self.d_inner, bias=True)
+        self.B_proj = nn.Linear(self.d_inner, d_state, bias=False)
+        self.C_proj = nn.Linear(self.d_inner, d_state, bias=False)
+        # Initialize A matrix
+        A = torch.arange(1, d_state + 1, dtype=torch.float32)
+        A = A.unsqueeze(0).repeat(self.d_inner, 1)
+        self.A_log = nn.Parameter(torch.log(A))
+        self.D = nn.Parameter(torch.ones(self.d_inner))
+        self.dropout = nn.Dropout(0.1)
+    def forward(self, x):
+        batch_size, seq_len, d_model = x.shape
+        # Dual path
+        xz = self.in_proj(x)  # (B, L, 2*d_inner)
+        x_inner, z = xz.chunk(2, dim=-1)  # Each: (B, L, d_inner)
+        # Convolution
+        x_conv = x_inner.transpose(1, 2)  # (B, d_inner, L)
+        x_conv = self.conv1d(x_conv)  # (B, d_inner, L)
+        x_conv = x_conv.transpose(1, 2)  # (B, L, d_inner)
+        x_conv = F.silu(x_conv)
+        # State space
+        y = self.selective_scan(x_conv)
+        # Gate and output
+        y = y * F.silu(z)
+        output = self.out_proj(y)
+        return self.dropout(output)
+    def selective_scan(self, x):
+        """Simplified selective scan"""
+        batch_size, seq_len, d_inner = x.shape
+        # Get parameters
+        dt = F.softplus(self.dt_proj(x))  # (B, L, d_inner)
+        B = self.B_proj(x)  # (B, L, d_state)
+        C = self.C_proj(x)  # (B, L, d_state)
+        # Discretize A
+        A = -torch.exp(self.A_log)  # (d_inner, d_state)
+        deltaA = torch.exp(dt.unsqueeze(-1) * A.unsqueeze(0).unsqueeze(0))  # (B, L, d_inner, d_state)
+        deltaB = dt.unsqueeze(-1) * B.unsqueeze(2)  # (B, L, d_inner, d_state)
+        # Initialize state
+        h = torch.zeros(batch_size, d_inner, self.d_state, device=x.device)
+        outputs = []
+        # Sequential processing
+        for i in range(seq_len):
+            h = deltaA[:, i] * h + deltaB[:, i] * x[:, i].unsqueeze(-1)
+            y = torch.sum(h * C[:, i].unsqueeze(1), dim=-1) + self.D * x[:, i]
+            outputs.append(y)
+        return torch.stack(outputs, dim=1)
+class CognitiveMomentumEngine(nn.Module):
+    """Simplified cognitive momentum"""
+    def __init__(self, d_model):
         super().__init__()
         self.d_model = d_model
+        # Momentum projections
+        self.momentum_proj = nn.Linear(d_model, d_model)
+        self.force_proj = nn.Linear(d_model, d_model)
+        # Memory
+        self.register_buffer('momentum_state', torch.zeros(d_model))
+        self.decay = 0.95
     def forward(self, x):
         if x.dim() == 2:
+            batch_size, d_model = x.shape
+            # Global momentum update
+            force = self.force_proj(x.mean(dim=0))
+            self.momentum_state = self.decay * self.momentum_state + (1 - self.decay) * force
+            # Apply momentum
+            momentum_effect = self.momentum_proj(self.momentum_state).unsqueeze(0).expand(batch_size, -1)
+            return x + momentum_effect * 0.1
+        else:
+            return x
+class AstrocyteLayer(nn.Module):
+    """Simplified astrocyte processing"""
+    def __init__(self, d_model):
         super().__init__()
         self.d_model = d_model
+        self.d_astrocyte = d_model
+        # Fast pathway
+        self.fast_proj = nn.Linear(d_model, d_model)
+        self.fast_dropout = nn.Dropout(0.1)
+        # Slow pathway
+        self.slow_proj = nn.Linear(d_model, self.d_astrocyte)
+        self.slow_integrate = nn.Linear(self.d_astrocyte, d_model)
+        self.slow_dropout = nn.Dropout(0.1)
+        # Gating
+        self.gate = nn.Linear(d_model * 2, d_model)
+        # Memory
+        self.register_buffer('slow_memory', torch.zeros(self.d_astrocyte))
+        self.memory_decay = 0.9
+    def forward(self, x):
+        if x.dim() == 3:
+            x = x.squeeze(0)
+        batch_size = x.size(0)
+        # Fast processing
+        fast_out = self.fast_dropout(F.relu(self.fast_proj(x)))
+        # Slow processing with memory
+        slow_input = self.slow_proj(x.mean(dim=0))
+        self.slow_memory = self.memory_decay * self.slow_memory + (1 - self.memory_decay) * slow_input
+        slow_out = self.slow_dropout(F.relu(self.slow_integrate(self.slow_memory)))
+        slow_out = slow_out.unsqueeze(0).expand(batch_size, -1)
+        # Combine
+        combined = torch.cat([fast_out, slow_out], dim=-1)
+        gated = torch.sigmoid(self.gate(combined))
+        return fast_out * gated + slow_out * (1 - gated)
+class RevolutionaryGraphMamba(nn.Module):
+    """Complete revolutionary implementation"""
     def __init__(self, config):
         super().__init__()
         # Input processing
         self.input_proj = nn.Linear(input_dim, d_model)
         self.input_norm = nn.LayerNorm(d_model)
+        self.input_dropout = nn.Dropout(0.2)
+        # Data augmentation
+        self.augmentation = GraphDataAugmentation()
+        # Core components
         self.gcn_layers = nn.ModuleList([
             GCNConv(d_model, d_model) for _ in range(n_layers)
         ])
         self.astrocyte_layers = nn.ModuleList([
             AstrocyteLayer(d_model) for _ in range(n_layers)
         ])
+        self.mamba_blocks = nn.ModuleList([
+            SimpleMambaBlock(d_model) for _ in range(n_layers)
+        ])
+        # Cognitive momentum
+        self.momentum_engine = CognitiveMomentumEngine(d_model)
+        # Layer processing
+        self.layer_norms = nn.ModuleList([
             nn.LayerNorm(d_model) for _ in range(n_layers)
         ])
+        self.layer_dropouts = nn.ModuleList([
+            nn.Dropout(0.1) for _ in range(n_layers)
+        ])
+        # Fusion
+        self.fusion_weights = nn.Parameter(torch.tensor([0.4, 0.3, 0.3]))
+        self.fusion_proj = nn.Linear(d_model * 3, d_model)
+        # Output
+        self.output_proj = nn.Linear(d_model, d_model)
+        self.output_dropout = nn.Dropout(0.2)
         self.classifier = None
+        # Initialize weights
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            torch.nn.init.xavier_uniform_(module.weight)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.LayerNorm):
+            torch.nn.init.ones_(module.weight)
+            torch.nn.init.zeros_(module.bias)
     def forward(self, x, edge_index, batch=None):
+        # Apply data augmentation during training
+        if self.training:
+            x = self.augmentation.augment_features(x)
+            edge_index = self.augmentation.augment_edges(edge_index)
         # Input processing
+        h = self.input_dropout(self.input_norm(self.input_proj(x)))
+        # Apply cognitive momentum
+        h = self.momentum_engine(h)
+        # Multi-path processing
         for i in range(len(self.gcn_layers)):
             gcn = self.gcn_layers[i]
+            astrocyte = self.astrocyte_layers[i]
+            mamba = self.mamba_blocks[i]
+            norm = self.layer_norms[i]
+            dropout = self.layer_dropouts[i]
+            # Path 1: GCN (structural)
+            h_gcn = F.relu(gcn(h, edge_index))
+            # Path 2: Astrocyte (temporal)
+            h_astrocyte = astrocyte(h)
+            # Path 3: Mamba (sequential)
+            h_mamba = mamba(h.unsqueeze(0)).squeeze(0)
+            # Fusion
+            h_paths = torch.stack([h_gcn, h_astrocyte, h_mamba], dim=-1)  # (nodes, d_model, 3)
+            weights = F.softmax(self.fusion_weights, dim=0)  # (3,)
+            h_fused = torch.sum(h_paths * weights, dim=-1)  # (nodes, d_model)
+            # Residual connection
+            h = dropout(norm(h + h_fused))
+        # Output processing
+        h = self.output_dropout(self.output_proj(h))
         return h
     def _init_classifier(self, num_classes, device):
         if self.classifier is None:
             self.classifier = nn.Sequential(
+                nn.Dropout(0.3),
                 nn.Linear(self.config['model']['d_model'], num_classes)
             ).to(device)
+        return self.classifier
     def get_performance_stats(self):
         total_params = sum(p.numel() for p in self.parameters())
+        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
         return {
             'total_params': total_params,
+            'trainable_params': trainable_params,
             'device': next(self.parameters()).device,
             'dtype': next(self.parameters()).dtype,
             'model_size': f"{total_params/1000:.1f}K parameters"
         }
+class SimpleGraphMamba(nn.Module):
+    """Simplified but working version"""
     def __init__(self, config):
         super().__init__()
         self.config = config
+        d_model = config['model']['d_model']
+        n_layers = config['model']['n_layers']
+        input_dim = config.get('input_dim', 1433)
+        # Simple architecture
+        self.input_proj = nn.Linear(input_dim, d_model)
+        self.input_norm = nn.LayerNorm(d_model)
+        # GCN backbone
+        self.gcn_layers = nn.ModuleList([
+            GCNConv(d_model, d_model) for _ in range(n_layers)
+        ])
+        # Enhanced features
+        self.enhancements = nn.ModuleList([
+            nn.Sequential(
+                nn.Linear(d_model, d_model * 2),
+                nn.ReLU(),
+                nn.Dropout(0.1),
+                nn.Linear(d_model * 2, d_model)
+            ) for _ in range(n_layers)
+        ])
+        self.layer_norms = nn.ModuleList([
+            nn.LayerNorm(d_model) for _ in range(n_layers)
+        ])
+        self.dropout = nn.Dropout(0.2)
         self.classifier = None
     def forward(self, x, edge_index, batch=None):
+        h = self.input_norm(self.input_proj(x))
+        for i, (gcn, enhance, norm) in enumerate(zip(self.gcn_layers, self.enhancements, self.layer_norms)):
+            # GCN processing
+            h_gcn = F.relu(gcn(h, edge_index))
+            # Enhancement
+            h_enhanced = enhance(h_gcn)
+            # Residual + norm
+            h = norm(h + h_enhanced)
+            h = self.dropout(h)
+        return h
     def _init_classifier(self, num_classes, device):
+        if self.classifier is None:
+            self.classifier = nn.Sequential(
+                nn.Dropout(0.3),
+                nn.Linear(self.config['model']['d_model'], num_classes)
+            ).to(device)
         return self.classifier
     def get_performance_stats(self):
+        total_params = sum(p.numel() for p in self.parameters())
+        return {
+            'total_params': total_params,
+            'device': next(self.parameters()).device,
+            'model_size': f"{total_params/1000:.1f}K parameters"
+        }
 def create_astrocyte_config():
+    """Optimized configuration"""
     return {
         'model': {
+            'd_model': 64,      # Reduced to prevent overfitting
             'd_state': 8,
             'd_conv': 4,
             'expand': 2,
+            'n_layers': 2,      # Reduced layers
+            'dropout': 0.2
         },
         'data': {
             'batch_size': 1,
             'test_split': 0.2
         },
         'training': {
+            'learning_rate': 0.01,
+            'weight_decay': 0.005,
+            'epochs': 200,
+            'patience': 30,
+            'warmup_epochs': 10,
+            'min_lr': 1e-5,
             'label_smoothing': 0.0,
+            'max_gap': 0.15
+        },
+        'ordering': {
+            'strategy': 'none',
+            'preserve_locality': True
+        },
+        'input_dim': 1433
+    }
+def create_regularized_config():
+    """Heavily regularized config for small datasets"""
+    return {
+        'model': {
+            'd_model': 32,      # Very small
+            'd_state': 4,
+            'd_conv': 4,
+            'expand': 2,
+            'n_layers': 2,
+            'dropout': 0.3
+        },
+        'data': {
+            'batch_size': 1,
+            'test_split': 0.2
+        },
+        'training': {
+            'learning_rate': 0.005,
+            'weight_decay': 0.01,
+            'epochs': 150,
+            'patience': 20,
+            'warmup_epochs': 5,
+            'min_lr': 1e-6,
+            'label_smoothing': 0.1,
+            'max_gap': 0.1
         },
         'ordering': {
             'strategy': 'none',
         'input_dim': 1433
     }
+# Model aliases
+GraphMamba = RevolutionaryGraphMamba
+AstrocyteGraphMamba = RevolutionaryGraphMamba
+HybridGraphMamba = SimpleGraphMamba  # Fallback to simple version
+QuantumEnhancedGraphMamba = SimpleGraphMamba