shengyongniu
/

dawo

@@ -2,8 +2,10 @@ import torch
 import numpy as np
 import pandas as pd
 import json
 from dawo_wrapper import DAWOWrapper
 print("DAWO Model Example: Drug Response Prediction")
 print("============================================")
@@ -15,7 +17,7 @@ model = DAWOWrapper(repo_path="./")
 # Load data files from the data folder
 print("\n2. Loading drug and cell line features...")
-# Set data directory (use local data directory)
 data_dir = "./data"
 # Drug feature components
@@ -43,8 +45,8 @@ print(f"     Shape: {cell_features.shape}")
 # Select sample drug and cell line
 print("\n3. Preparing inputs for prediction:")
-# Select a drug for demonstration
-sample_drug = list(drug_name.keys())[0]
 print(f"   - Selected drug: {sample_drug}")
 # Create complete drug feature vector by concatenating the three embedding types
@@ -58,8 +60,8 @@ drug_feature = np.concatenate((
 drug_features = torch.tensor(drug_feature, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
 print(f"     Combined drug feature shape: {drug_features.shape}")
-# Select a cell line for demonstration
-sample_cell = cell_features.index[0]
 print(f"   - Selected cell line: {sample_cell}")
 # Create cell line feature vector
@@ -75,12 +77,31 @@ padded_features[0, :cell_features_tensor.shape[1]] = cell_features_tensor
 cell_features_tensor = padded_features
 print(f"     Padded cell feature shape: {cell_features_tensor.shape}")
-# Create simulated gene expression data (normally this would be real data)
-print("   - Creating sample gene expression data...")
-gene_expression = torch.randn(1, 5000)  # 1 sample, 5000 genes
 print(f"     Gene expression shape: {gene_expression.shape}")
-# Run prediction with prepared data
 print("\n4. Running prediction with DAWO model...")
 results = model.predict(gene_expression, drug_features, cell_features_tensor)

 import numpy as np
 import pandas as pd
 import json
+import scanpy as sc
 from dawo_wrapper import DAWOWrapper
+from dawo import Anndata_to_Tensor
 print("DAWO Model Example: Drug Response Prediction")
 print("============================================")
 # Load data files from the data folder
 print("\n2. Loading drug and cell line features...")
+# Set data directory
 data_dir = "./data"
 # Drug feature components
 # Select sample drug and cell line
 print("\n3. Preparing inputs for prediction:")
+# Select a drug for demonstration - use Dabrafenib
+sample_drug = "Dabrafenib"
 print(f"   - Selected drug: {sample_drug}")
 # Create complete drug feature vector by concatenating the three embedding types
 drug_features = torch.tensor(drug_feature, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
 print(f"     Combined drug feature shape: {drug_features.shape}")
+# Select the MIA PaCa-2 cell line
+sample_cell = "MIA PaCa-2"
 print(f"   - Selected cell line: {sample_cell}")
 # Create cell line feature vector
 cell_features_tensor = padded_features
 print(f"     Padded cell feature shape: {cell_features_tensor.shape}")
+# Load gene expression data
+print("\n   - Loading real gene expression data for Dabrafenib on MIA_PaCa-2 cell line...")
+adata = sc.read_h5ad(f'{data_dir}/Dabrafenib.MIA_PaCa-2.h5ad')
+print(f"     AnnData object shape: {adata.shape}")
+# Preprocess gene expression
+print("   - Preprocessing gene expression data...")
+if 'highly_variable' not in adata.var:
+    print("     Selecting top 5000 highly variable genes...")
+    sc.pp.highly_variable_genes(adata, n_top_genes=5000)
+    adata = adata[:, adata.var.highly_variable]
+else:
+    print("     Using pre-identified highly variable genes...")
+    if adata.shape[1] > 5000:
+        print("     Subsetting to 5000 genes...")
+        adata = adata[:, 0:5000]
+# Convert to tensor
+print("   - Converting gene expression to tensor...")
+gene_expression = Anndata_to_Tensor(adata)
+if len(gene_expression.shape) == 1:
+    gene_expression = gene_expression.unsqueeze(0)  # Add batch dimension
 print(f"     Gene expression shape: {gene_expression.shape}")
+# Run prediction
 print("\n4. Running prediction with DAWO model...")
 results = model.predict(gene_expression, drug_features, cell_features_tensor)

requirements.txt CHANGED Viewed

@@ -2,4 +2,6 @@ torch>=1.10.0
 numpy>=1.20.0
 pandas>=1.3.0
 scipy>=1.7.0
-pyarrow>=7.0.0

 numpy>=1.20.0
 pandas>=1.3.0
 scipy>=1.7.0
+pyarrow>=7.0.0
+scanpy>=1.9.0
+anndata>=0.8.0