Spaces:

kfoughali
/

serpent

Running

App Files Files Community

kfoughali commited on Jul 29

Commit

c6e11c4

verified ·

1 Parent(s): 454d2b9

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -281

app.py CHANGED Viewed

@@ -1,296 +1,148 @@
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch_geometric.utils import degree, to_dense_adj
-from torch_geometric.nn import GCNConv
-import networkx as nx
 import logging
 logger = logging.getLogger(__name__)
-class MambaBlock(nn.Module):
-    def __init__(self, d_model, d_state=4, d_conv=4, expand=2):
-        super().__init__()
-        self.d_model = d_model
-        self.d_inner = int(expand * d_model)
-        self.d_state = d_state
-        self.in_proj = nn.Linear(d_model, self.d_inner * 2, bias=False)
-        self.conv1d = nn.Conv1d(self.d_inner, self.d_inner, d_conv, groups=self.d_inner, padding=d_conv-1)
-        self.act = nn.SiLU()
-        self.x_proj = nn.Linear(self.d_inner, d_state * 2 + 1, bias=False)
-        self.dt_proj = nn.Linear(1, self.d_inner, bias=True)
-        A = torch.arange(1, d_state + 1, dtype=torch.float32).unsqueeze(0).repeat(self.d_inner, 1)
-        self.A_log = nn.Parameter(torch.log(A))
-        self.D = nn.Parameter(torch.ones(self.d_inner))
-        self.out_proj = nn.Linear(self.d_inner, d_model, bias=False)
-        self.dropout = nn.Dropout(0.3)
-    def forward(self, x):
-        batch, length, d_model = x.shape
-        xz = self.in_proj(x)
-        x, z = xz.chunk(2, dim=-1)
-        x = x.transpose(1, 2)
-        x = self.conv1d(x)[:, :, :length]
-        x = x.transpose(1, 2)
-        x = self.act(x)
-        x = self.dropout(x)
-        y = self.selective_scan(x)
-        y = y * self.act(z)
-        return self.dropout(self.out_proj(y))
-    def selective_scan(self, x):
-        batch, length, d_inner = x.shape
-        deltaBC = self.x_proj(x)
-        delta, B, C = torch.split(deltaBC, [1, self.d_state, self.d_state], dim=-1)
-        delta = F.softplus(self.dt_proj(delta))
-        deltaA = torch.exp(delta.unsqueeze(-1) * (-torch.exp(self.A_log)))
-        deltaB = delta.unsqueeze(-1) * B.unsqueeze(2)
-        states = torch.zeros(batch, d_inner, self.d_state, device=x.device)
-        outputs = []
-        for i in range(length):
-            states = deltaA[:, i] * states + deltaB[:, i] * x[:, i, :, None]
-            y = (states @ C[:, i, :, None]).squeeze(-1) + self.D * x[:, i]
-            outputs.append(y)
-        return torch.stack(outputs, dim=1)
-class GraphStructureEncoder(nn.Module):
-    """Encode graph structure to preserve in sequential processing"""
-    def __init__(self, d_model):
-        super().__init__()
-        self.adjacency_proj = nn.Linear(1, d_model)
-        self.structure_attention = nn.MultiheadAttention(d_model, num_heads=4, batch_first=True)
-        self.norm = nn.LayerNorm(d_model)
-    def forward(self, x, edge_index):
-        # Create adjacency features
-        adj = to_dense_adj(edge_index, max_num_nodes=x.size(0)).squeeze(0)
-        # Add self-connections and normalize
-        adj = adj + torch.eye(adj.size(0), device=adj.device)
-        deg = adj.sum(dim=1, keepdim=True)
-        adj_norm = adj / (deg + 1e-8)
-        # Project adjacency to feature space
-        adj_features = self.adjacency_proj(adj_norm.unsqueeze(-1))
-        # Attention over structure
-        x_with_structure = x.unsqueeze(0)  # Add batch dim
-        adj_features = adj_features.unsqueeze(0)
-        attended, _ = self.structure_attention(x_with_structure, adj_features, adj_features)
-        return self.norm(x + attended.squeeze(0))
-class SpectralOrdering:
-    """Spectral graph ordering to preserve structure"""
-    @staticmethod
-    def compute_ordering(edge_index, num_nodes):
-        try:
-            # Create adjacency matrix
-            adj = to_dense_adj(edge_index, max_num_nodes=num_nodes).squeeze(0)
-            # Add self-loops
-            adj = adj + torch.eye(num_nodes, device=adj.device)
-            # Compute degree matrix
-            deg = torch.diag(adj.sum(dim=1))
-            # Laplacian
-            L = deg - adj
-            # Eigendecomposition (use only first few eigenvectors)
-            try:
-                eigenvals, eigenvecs = torch.linalg.eigh(L)
-                # Sort by second smallest eigenvalue (Fiedler vector)
-                fiedler = eigenvecs[:, 1]
-                order = torch.argsort(fiedler)
-                return order
-            except:
-                # Fallback to degree ordering
-                degrees = adj.sum(dim=1)
-                return torch.argsort(degrees, descending=True)
-        except:
-            return torch.arange(num_nodes)
-class GraphMamba(nn.Module):
-    """Enhanced GraphMamba with structure preservation"""
-    def __init__(self, config):
-        super().__init__()
-        self.config = config
-        d_model = config['model']['d_model']
-        n_layers = config['model']['n_layers']
-        input_dim = config.get('input_dim', 1433)
-        # Input processing
-        self.input_proj = nn.Linear(input_dim, d_model)
-        self.input_dropout = nn.Dropout(0.5)
-        # Graph structure encoding
-        self.structure_encoder = GraphStructureEncoder(d_model)
-        # Positional encoding
-        self.pos_encoding = nn.Embedding(5000, d_model)
-        self.degree_encoding = nn.Embedding(100, d_model)
-        # Mamba layers
-        self.mamba_layers = nn.ModuleList([
-            MambaBlock(d_model, d_state=4) for _ in range(n_layers)
-        ])
-        self.layer_norms = nn.ModuleList([
-            nn.LayerNorm(d_model) for _ in range(n_layers)
-        ])
-        self.hidden_dropout = nn.Dropout(0.5)
-        self.output_proj = nn.Linear(d_model, d_model)
-        # Classifier
-        self.classifier = None
-    def _get_ordering(self, edge_index, num_nodes):
-        """Get node ordering based on strategy"""
-        strategy = self.config['ordering']['strategy']
-        if strategy == 'spectral':
-            return SpectralOrdering.compute_ordering(edge_index, num_nodes)
-        elif strategy == 'degree':
-            degrees = degree(edge_index[0], num_nodes)
-            return torch.argsort(degrees, descending=True)
-        else:  # bfs
-            return torch.arange(num_nodes)
-    def forward(self, x, edge_index, batch=None):
-        # Input projection
-        h = self.input_dropout(self.input_proj(x))
-        # Add structural information
-        h = self.structure_encoder(h, edge_index)
-        # Add positional encodings
-        positions = torch.arange(h.size(0), device=h.device).clamp(max=4999)
-        degrees = degree(edge_index[0], h.size(0)).long().clamp(max=99)
-        h = h + self.pos_encoding(positions) + self.degree_encoding(degrees)
-        # Get ordering
-        order = self._get_ordering(edge_index, h.size(0))
-        h_ordered = h[order].unsqueeze(0)
-        # Process through Mamba layers
-        for mamba, ln in zip(self.mamba_layers, self.layer_norms):
-            residual = h_ordered
-            h_ordered = ln(h_ordered)
-            h_ordered = residual + mamba(h_ordered)
-            h_ordered = self.hidden_dropout(h_ordered)
-        # Restore order
-        h_restored = torch.zeros_like(h_ordered.squeeze(0))
-        h_restored[order] = h_ordered.squeeze(0)
-        return self.output_proj(h_restored)
-    def _init_classifier(self, num_classes, device):
-        if self.classifier is None:
-            self.classifier = nn.Sequential(
-                nn.Dropout(0.5),
-                nn.Linear(self.config['model']['d_model'], num_classes)
-            ).to(device)
-    def get_performance_stats(self):
-        total_params = sum(p.numel() for p in self.parameters())
-        return {
-            'total_params': total_params,
-            'device': next(self.parameters()).device,
-            'dtype': next(self.parameters()).dtype,
-            'model_size': f"{total_params/1000:.1f}K parameters"
-        }
-class HybridGraphMamba(nn.Module):
-    """Hybrid approach: Mamba + minimal GCN"""
-    def __init__(self, config):
-        super().__init__()
-        d_model = config['model']['d_model']
-        input_dim = config.get('input_dim', 1433)
-        # Mamba branch
-        self.mamba = GraphMamba(config)
-        # GCN branch (single layer)
-        self.gcn = GCNConv(input_dim, d_model)
-        # Fusion
-        self.fusion = nn.Sequential(
-            nn.Linear(d_model * 2, d_model),
-            nn.ReLU(),
-            nn.Dropout(0.3),
-            nn.Linear(d_model, d_model)
-        )
-        self.classifier = None
-        self.config = config
-    def forward(self, x, edge_index, batch=None):
-        # Mamba branch
-        mamba_out = self.mamba(x, edge_index, batch)
-        # GCN branch
-        gcn_out = F.dropout(F.relu(self.gcn(x, edge_index)), 0.5, training=self.training)
-        # Fuse
-        combined = torch.cat([mamba_out, gcn_out], dim=-1)
-        return self.fusion(combined)
-    def _init_classifier(self, num_classes, device):
-        if self.classifier is None:
-            self.classifier = nn.Sequential(
-                nn.Dropout(0.5),
-                nn.Linear(self.config['model']['d_model'], num_classes)
-            ).to(device)
-    def get_performance_stats(self):
-        return self.mamba.get_performance_stats()
-def create_regularized_config():
-    """Optimized config with structure preservation"""
-    return {
-        'model': {
-            'd_model': 64,
-            'd_state': 4,
-            'd_conv': 4,
-            'expand': 2,
-            'n_layers': 2,
-            'dropout': 0.5
-        },
-        'data': {
-            'batch_size': 1,
-            'test_split': 0.2
-        },
-        'training': {
-            'learning_rate': 0.001,  # Slightly higher
-            'weight_decay': 0.01,
-            'epochs': 200,
-            'patience': 15,
-            'warmup_epochs': 10,
-            'min_lr': 1e-6
-        },
-        'ordering': {
-            'strategy': 'spectral',  # Changed from bfs
-            'preserve_locality': True
-        },
-        'input_dim': 1433
-    }

+#!/usr/bin/env python3
+"""
+Enhanced Mamba Graph with structure preservation and interface fix
+"""
+import os
+os.environ['OMP_NUM_THREADS'] = '4'
 import torch
+import time
 import logging
+import threading
+import signal
+from core.graph_mamba import GraphMamba, HybridGraphMamba, create_regularized_config
+from core.trainer import GraphMambaTrainer
+from data.loader import GraphDataLoader
+from utils.visualization import GraphVisualizer
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+def get_device():
+    if torch.cuda.is_available():
+        device = torch.device('cuda')
+        logger.info(f"🚀 CUDA available - using GPU: {torch.cuda.get_device_name()}")
+    else:
+        device = torch.device('cpu')
+        logger.info("💻 Using CPU")
+    return device
+def run_comprehensive_test():
+    """Enhanced test with structure preservation"""
+    print("🧠 Enhanced Mamba Graph Neural Network")
+    print("=" * 60)
+    config = create_regularized_config()
+    device = get_device()
+    try:
+        # Data loading
+        print("\n📊 Loading Cora dataset...")
+        data_loader = GraphDataLoader()
+        dataset = data_loader.load_node_classification_data('Cora')
+        data = dataset[0].to(device)
+        info = data_loader.get_dataset_info(dataset)
+        print(f"✅ Dataset loaded: {data.num_nodes} nodes, {data.num_edges} edges")
+        # Test both models
+        models_to_test = [
+            ("Enhanced GraphMamba", GraphMamba),
+            ("Hybrid GraphMamba", HybridGraphMamba)
+        ]
+        results = {}
+        for model_name, model_class in models_to_test:
+            print(f"\n🏗️ Testing {model_name}...")
+            model = model_class(config).to(device)
+            total_params = sum(p.numel() for p in model.parameters())
+            train_samples = data.train_mask.sum().item()
+            print(f"   Parameters: {total_params:,} ({total_params/train_samples:.1f} per sample)")
+            # Training
+            trainer = GraphMambaTrainer(model, config, device)
+            print(f"   Strategy: {config['ordering']['strategy']}")
+            start_time = time.time()
+            history = trainer.train_node_classification(data, verbose=False)
+            training_time = time.time() - start_time
+            # Evaluation
+            test_metrics = trainer.test(data)
+            results[model_name] = {
+                'test_acc': test_metrics['test_acc'],
+                'val_acc': trainer.best_val_acc,
+                'gap': trainer.best_gap,
+                'params': total_params,
+                'time': training_time
+            }
+            print(f"   ✅ Test Accuracy: {test_metrics['test_acc']:.4f} ({test_metrics['test_acc']*100:.2f}%)")
+            print(f"   📊 Validation: {trainer.best_val_acc:.4f}")
+            print(f"   🎯 Gap: {trainer.best_gap:.4f}")
+            print(f"   ⏱️ Time: {training_time:.1f}s")
+        # Comparison
+        print(f"\n📈 Model Comparison:")
+        print(f"{'Model':<20} {'Test Acc':<10} {'Val Acc':<10} {'Gap':<8} {'Params':<8}")
+        print("-" * 60)
+        for name, result in results.items():
+            print(f"{name:<20} {result['test_acc']:.4f}     {result['val_acc']:.4f}     "
+                  f"{result['gap']:>6.3f}   {result['params']/1000:.0f}K")
+        # Best model
+        best_model = max(results.items(), key=lambda x: x[1]['test_acc'])
+        print(f"\n🏆 Best: {best_model[0]} - {best_model[1]['test_acc']*100:.2f}% accuracy")
+        # Baseline comparison
+        baselines = {'Random': 0.143, 'GCN': 0.815, 'GAT': 0.830}
+        best_acc = best_model[1]['test_acc']
+        print(f"\n📊 vs Baselines:")
+        for baseline, acc in baselines.items():
+            diff = best_acc - acc
+            status = "🟢" if diff > 0 else "🔴"
+            print(f"   {status} {baseline}: {acc:.3f} (diff: {diff:+.3f})")
+        print(f"\n✨ Testing complete! Process staying alive for interface...")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        print("Process staying alive despite error...")
+def keep_alive():
+    """Keep process running for interface"""
+    try:
+        while True:
+            time.sleep(60)
+    except KeyboardInterrupt:
+        print("\n👋 Shutting down gracefully...")
+def run_background():
+    """Run test in background thread"""
+    try:
+        run_comprehensive_test()
+    except Exception as e:
+        print(f"Background test error: {e}")
+    finally:
+        print("Background test complete, keeping alive...")
+if __name__ == "__main__":
+    # Start test in background thread
+    test_thread = threading.Thread(target=run_background, daemon=True)
+    test_thread.start()
+    # Keep main thread alive for interface
+    try:
+        keep_alive()
+    except KeyboardInterrupt:
+        print("\nExiting...")
+    except Exception as e:
+        print(f"Main thread error: {e}")
+        keep_alive()  # Still try to keep alive