Spaces:

alidenewade
/

drug-discovery-app

Sleeping

App Files Files Community

alidenewade commited on Jun 9

Commit

b6dbfa3

verified ·

1 Parent(s): a07fa4c

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -27

app.py CHANGED Viewed

@@ -200,39 +200,83 @@ def mannwhitney_test(df, descriptor): #
 # === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
 # ==============================================================================
-def calculate_fingerprints(current_state, fingerprint_type, progress=gr.Progress()): #
-    input_df = current_state.get('cleaned_data') #
-    if input_df is None or input_df.empty: raise gr.Error("No cleaned data found. Please complete Step 1.") #
-    if not fingerprint_type: raise gr.Error("Please select a fingerprint type.") #
-    progress(0, desc="Starting..."); yield f"🧪 Starting fingerprint calculation...", None, gr.update(visible=False), None, current_state #
-    try: #
-        smi_file, output_csv = 'molecule.smi', 'fingerprints.csv' #
-        input_df[['canonical_smiles', 'canonical_smiles']].to_csv(smi_file, sep='\t', index=False, header=False) #
-        if os.path.exists(output_csv): os.remove(output_csv) #
-        descriptortypes = fp_config.get(fingerprint_type) #
-        if not descriptortypes: raise gr.Error(f"Descriptor XML for '{fingerprint_type}' not found.") #
-        progress(0.3, desc="⚗️ Running PaDEL..."); yield f"⚗️ Running PaDEL...", None, gr.update(visible=False), None, current_state #
-        padeldescriptor(mol_dir=smi_file, d_file=output_csv, descriptortypes=descriptortypes, detectaromaticity=True, standardizenitro=True, standardizetautomers=True, threads=-1, removesalt=True, log=False, fingerprints=True) #
-        if not os.path.exists(output_csv) or os.path.getsize(output_csv) == 0: #
-            raise gr.Error("PaDEL failed to produce an output file. Check molecule validity.") #
-        progress(0.7, desc="📊 Processing results..."); yield "📊 Processing results...", None, gr.update(visible=False), None, current_state #
-        df_X = pd.read_csv(output_csv).rename(columns={'Name': 'canonical_smiles'}) #
-        final_df = pd.merge(input_df[['canonical_smiles', 'pIC50']], df_X, on='canonical_smiles', how='inner') #
-        current_state['fingerprint_data'] = final_df; current_state['fingerprint_type'] = fingerprint_type #
-        progress(0.9, desc="🖼️ Generating molecule grid...") #
-        mols_html = mols2grid.display(final_df, smiles_col='canonical_smiles', subset=['img', 'pIC50'], rename={"pIC50": "pIC50"}, transform={"pIC50": lambda x: f"{x:.2f}"})._repr_html_() #
-        success_msg = f"✅ Success! Generated {len(df_X.columns) -1} descriptors for {len(final_df)} molecules." #
-        progress(1, desc="Completed!"); yield success_msg, final_df, gr.update(visible=True), gr.update(value=mols_html, visible=True), current_state #
-    except Exception as e: raise gr.Error(f"Calculation failed: {e}") #
-    finally: #
-        if os.path.exists('molecule.smi'): os.remove('molecule.smi') #
-        if os.path.exists('fingerprints.csv'): os.remove('fingerprints.csv') #
 # ==============================================================================
 # === STEP 3: MODEL TRAINING & PREDICTION FUNCTIONS ===

 # === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
 # ==============================================================================
+def calculate_fingerprints(current_state, fingerprint_type, progress=gr.Progress()):
+    input_df = current_state.get('cleaned_data')
+    if input_df is None or input_df.empty:
+        raise gr.Error("No cleaned data found. Please complete Step 1.")
+    if not fingerprint_type:
+        raise gr.Error("Please select a fingerprint type.")
+    progress(0, desc="Starting...")
+    yield f"🧪 Starting fingerprint calculation...", None, gr.update(visible=False), None, current_state
+    try:
+        smi_file, output_csv = 'molecule.smi', 'fingerprints.csv'
+        input_df[['canonical_smiles', 'canonical_smiles']].to_csv(smi_file, sep='\t', index=False, header=False)
+        if os.path.exists(output_csv):
+            os.remove(output_csv)
+        descriptortypes = fp_config.get(fingerprint_type)
+        if not descriptortypes:
+            raise gr.Error(f"Descriptor XML for '{fingerprint_type}' not found.")
+        progress(0.3, desc="⚗️ Running PaDEL...")
+        yield f"⚗️ Running PaDEL...", None, gr.update(visible=False), None, current_state
+        padeldescriptor(mol_dir=smi_file, d_file=output_csv, descriptortypes=descriptortypes, detectaromaticity=True, standardizenitro=True, standardizetautomers=True, threads=-1, removesalt=True, log=False, fingerprints=True)
+        if not os.path.exists(output_csv) or os.path.getsize(output_csv) == 0:
+            raise gr.Error("PaDEL failed to produce an output file. Check molecule validity.")
+        progress(0.7, desc="📊 Processing results...")
+        yield "📊 Processing results...", None, gr.update(visible=False), None, current_state
+        df_X = pd.read_csv(output_csv).rename(columns={'Name': 'canonical_smiles'})
+        final_df = pd.merge(input_df[['canonical_smiles', 'pIC50']], df_X, on='canonical_smiles', how='inner')
+        current_state['fingerprint_data'] = final_df
+        current_state['fingerprint_type'] = fingerprint_type
+        progress(0.9, desc="🖼️ Generating molecule grid...")
+        # Test mols2grid functionality
+        print(f"Final dataframe shape: {final_df.shape}")
+        print(f"Columns: {final_df.columns.tolist()}")
+        print(f"First few SMILES: {final_df['canonical_smiles'].head().tolist()}")
+        # Test if RDKit can create molecules
+        from rdkit import Chem
+        test_mol = Chem.MolFromSmiles(final_df['canonical_smiles'].iloc[0])
+        print(f"Test molecule created: {test_mol is not None}")
+        # Create mols2grid with error handling
+        try:
+            mols_html = mols2grid.display(
+                final_df,
+                smiles_col='canonical_smiles',
+                subset=['img', 'pIC50'],
+                rename={"pIC50": "pIC50"},
+                transform={"pIC50": lambda x: f"{x:.2f}"}
+            )._repr_html_()
+            print("Mols2grid HTML created successfully")
+        except Exception as grid_error:
+            print(f"Mols2grid error: {grid_error}")
+            mols_html = f"<p>Error creating molecule grid: {str(grid_error)}</p>"
+        success_msg = f"✅ Success! Generated {len(df_X.columns) -1} descriptors for {len(final_df)} molecules."
+        progress(1, desc="Completed!")
+        yield success_msg, final_df, gr.update(visible=True), gr.update(value=mols_html, visible=True), current_state
+    except Exception as e:
+        print(f"Full error: {e}")
+        raise gr.Error(f"Calculation failed: {e}")
+    finally:
+        if os.path.exists('molecule.smi'):
+            os.remove('molecule.smi')
+        if os.path.exists('fingerprints.csv'):
+            os.remove('fingerprints.csv')
 # ==============================================================================
 # === STEP 3: MODEL TRAINING & PREDICTION FUNCTIONS ===