alidenewade commited on
Commit
b6dbfa3
Β·
verified Β·
1 Parent(s): a07fa4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -27
app.py CHANGED
@@ -200,39 +200,83 @@ def mannwhitney_test(df, descriptor): #
200
  # === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
201
  # ==============================================================================
202
 
203
- def calculate_fingerprints(current_state, fingerprint_type, progress=gr.Progress()): #
204
- input_df = current_state.get('cleaned_data') #
205
- if input_df is None or input_df.empty: raise gr.Error("No cleaned data found. Please complete Step 1.") #
206
- if not fingerprint_type: raise gr.Error("Please select a fingerprint type.") #
207
- progress(0, desc="Starting..."); yield f"πŸ§ͺ Starting fingerprint calculation...", None, gr.update(visible=False), None, current_state #
208
- try: #
209
- smi_file, output_csv = 'molecule.smi', 'fingerprints.csv' #
 
 
 
 
 
210
 
211
- input_df[['canonical_smiles', 'canonical_smiles']].to_csv(smi_file, sep='\t', index=False, header=False) #
212
 
213
- if os.path.exists(output_csv): os.remove(output_csv) #
214
- descriptortypes = fp_config.get(fingerprint_type) #
215
- if not descriptortypes: raise gr.Error(f"Descriptor XML for '{fingerprint_type}' not found.") #
 
 
216
 
217
- progress(0.3, desc="βš—οΈ Running PaDEL..."); yield f"βš—οΈ Running PaDEL...", None, gr.update(visible=False), None, current_state #
218
- padeldescriptor(mol_dir=smi_file, d_file=output_csv, descriptortypes=descriptortypes, detectaromaticity=True, standardizenitro=True, standardizetautomers=True, threads=-1, removesalt=True, log=False, fingerprints=True) #
219
- if not os.path.exists(output_csv) or os.path.getsize(output_csv) == 0: #
220
- raise gr.Error("PaDEL failed to produce an output file. Check molecule validity.") #
 
 
221
 
222
- progress(0.7, desc="πŸ“Š Processing results..."); yield "πŸ“Š Processing results...", None, gr.update(visible=False), None, current_state #
223
- df_X = pd.read_csv(output_csv).rename(columns={'Name': 'canonical_smiles'}) #
 
224
 
225
- final_df = pd.merge(input_df[['canonical_smiles', 'pIC50']], df_X, on='canonical_smiles', how='inner') #
226
 
227
- current_state['fingerprint_data'] = final_df; current_state['fingerprint_type'] = fingerprint_type #
228
- progress(0.9, desc="πŸ–ΌοΈ Generating molecule grid...") #
229
- mols_html = mols2grid.display(final_df, smiles_col='canonical_smiles', subset=['img', 'pIC50'], rename={"pIC50": "pIC50"}, transform={"pIC50": lambda x: f"{x:.2f}"})._repr_html_() #
230
- success_msg = f"βœ… Success! Generated {len(df_X.columns) -1} descriptors for {len(final_df)} molecules." #
231
- progress(1, desc="Completed!"); yield success_msg, final_df, gr.update(visible=True), gr.update(value=mols_html, visible=True), current_state #
232
- except Exception as e: raise gr.Error(f"Calculation failed: {e}") #
233
- finally: #
234
- if os.path.exists('molecule.smi'): os.remove('molecule.smi') #
235
- if os.path.exists('fingerprints.csv'): os.remove('fingerprints.csv') #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  # ==============================================================================
238
  # === STEP 3: MODEL TRAINING & PREDICTION FUNCTIONS ===
 
200
  # === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
201
  # ==============================================================================
202
 
203
+ def calculate_fingerprints(current_state, fingerprint_type, progress=gr.Progress()):
204
+ input_df = current_state.get('cleaned_data')
205
+ if input_df is None or input_df.empty:
206
+ raise gr.Error("No cleaned data found. Please complete Step 1.")
207
+ if not fingerprint_type:
208
+ raise gr.Error("Please select a fingerprint type.")
209
+
210
+ progress(0, desc="Starting...")
211
+ yield f"πŸ§ͺ Starting fingerprint calculation...", None, gr.update(visible=False), None, current_state
212
+
213
+ try:
214
+ smi_file, output_csv = 'molecule.smi', 'fingerprints.csv'
215
 
216
+ input_df[['canonical_smiles', 'canonical_smiles']].to_csv(smi_file, sep='\t', index=False, header=False)
217
 
218
+ if os.path.exists(output_csv):
219
+ os.remove(output_csv)
220
+ descriptortypes = fp_config.get(fingerprint_type)
221
+ if not descriptortypes:
222
+ raise gr.Error(f"Descriptor XML for '{fingerprint_type}' not found.")
223
 
224
+ progress(0.3, desc="βš—οΈ Running PaDEL...")
225
+ yield f"βš—οΈ Running PaDEL...", None, gr.update(visible=False), None, current_state
226
+ padeldescriptor(mol_dir=smi_file, d_file=output_csv, descriptortypes=descriptortypes, detectaromaticity=True, standardizenitro=True, standardizetautomers=True, threads=-1, removesalt=True, log=False, fingerprints=True)
227
+
228
+ if not os.path.exists(output_csv) or os.path.getsize(output_csv) == 0:
229
+ raise gr.Error("PaDEL failed to produce an output file. Check molecule validity.")
230
 
231
+ progress(0.7, desc="πŸ“Š Processing results...")
232
+ yield "πŸ“Š Processing results...", None, gr.update(visible=False), None, current_state
233
+ df_X = pd.read_csv(output_csv).rename(columns={'Name': 'canonical_smiles'})
234
 
235
+ final_df = pd.merge(input_df[['canonical_smiles', 'pIC50']], df_X, on='canonical_smiles', how='inner')
236
 
237
+ current_state['fingerprint_data'] = final_df
238
+ current_state['fingerprint_type'] = fingerprint_type
239
+
240
+ progress(0.9, desc="πŸ–ΌοΈ Generating molecule grid...")
241
+
242
+ # Test mols2grid functionality
243
+ print(f"Final dataframe shape: {final_df.shape}")
244
+ print(f"Columns: {final_df.columns.tolist()}")
245
+ print(f"First few SMILES: {final_df['canonical_smiles'].head().tolist()}")
246
+
247
+ # Test if RDKit can create molecules
248
+ from rdkit import Chem
249
+ test_mol = Chem.MolFromSmiles(final_df['canonical_smiles'].iloc[0])
250
+ print(f"Test molecule created: {test_mol is not None}")
251
+
252
+ # Create mols2grid with error handling
253
+ try:
254
+ mols_html = mols2grid.display(
255
+ final_df,
256
+ smiles_col='canonical_smiles',
257
+ subset=['img', 'pIC50'],
258
+ rename={"pIC50": "pIC50"},
259
+ transform={"pIC50": lambda x: f"{x:.2f}"}
260
+ )._repr_html_()
261
+ print("Mols2grid HTML created successfully")
262
+ except Exception as grid_error:
263
+ print(f"Mols2grid error: {grid_error}")
264
+ mols_html = f"<p>Error creating molecule grid: {str(grid_error)}</p>"
265
+
266
+ success_msg = f"βœ… Success! Generated {len(df_X.columns) -1} descriptors for {len(final_df)} molecules."
267
+ progress(1, desc="Completed!")
268
+ yield success_msg, final_df, gr.update(visible=True), gr.update(value=mols_html, visible=True), current_state
269
+
270
+ except Exception as e:
271
+ print(f"Full error: {e}")
272
+ raise gr.Error(f"Calculation failed: {e}")
273
+
274
+ finally:
275
+ if os.path.exists('molecule.smi'):
276
+ os.remove('molecule.smi')
277
+ if os.path.exists('fingerprints.csv'):
278
+ os.remove('fingerprints.csv')
279
+
280
 
281
  # ==============================================================================
282
  # === STEP 3: MODEL TRAINING & PREDICTION FUNCTIONS ===