Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -200,39 +200,83 @@ def mannwhitney_test(df, descriptor): #
|
|
200 |
# === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
|
201 |
# ==============================================================================
|
202 |
|
203 |
-
def calculate_fingerprints(current_state, fingerprint_type, progress=gr.Progress()):
|
204 |
-
input_df = current_state.get('cleaned_data')
|
205 |
-
if input_df is None or input_df.empty:
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
-
input_df[['canonical_smiles', 'canonical_smiles']].to_csv(smi_file, sep='\t', index=False, header=False)
|
212 |
|
213 |
-
if os.path.exists(output_csv):
|
214 |
-
|
215 |
-
|
|
|
|
|
216 |
|
217 |
-
progress(0.3, desc="βοΈ Running PaDEL...")
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
|
|
221 |
|
222 |
-
progress(0.7, desc="π Processing results...")
|
223 |
-
|
|
|
224 |
|
225 |
-
final_df = pd.merge(input_df[['canonical_smiles', 'pIC50']], df_X, on='canonical_smiles', how='inner')
|
226 |
|
227 |
-
current_state['fingerprint_data'] = final_df
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
# ==============================================================================
|
238 |
# === STEP 3: MODEL TRAINING & PREDICTION FUNCTIONS ===
|
|
|
200 |
# === STEP 2: FEATURE ENGINEERING FUNCTIONS ===
|
201 |
# ==============================================================================
|
202 |
|
203 |
+
def calculate_fingerprints(current_state, fingerprint_type, progress=gr.Progress()):
|
204 |
+
input_df = current_state.get('cleaned_data')
|
205 |
+
if input_df is None or input_df.empty:
|
206 |
+
raise gr.Error("No cleaned data found. Please complete Step 1.")
|
207 |
+
if not fingerprint_type:
|
208 |
+
raise gr.Error("Please select a fingerprint type.")
|
209 |
+
|
210 |
+
progress(0, desc="Starting...")
|
211 |
+
yield f"π§ͺ Starting fingerprint calculation...", None, gr.update(visible=False), None, current_state
|
212 |
+
|
213 |
+
try:
|
214 |
+
smi_file, output_csv = 'molecule.smi', 'fingerprints.csv'
|
215 |
|
216 |
+
input_df[['canonical_smiles', 'canonical_smiles']].to_csv(smi_file, sep='\t', index=False, header=False)
|
217 |
|
218 |
+
if os.path.exists(output_csv):
|
219 |
+
os.remove(output_csv)
|
220 |
+
descriptortypes = fp_config.get(fingerprint_type)
|
221 |
+
if not descriptortypes:
|
222 |
+
raise gr.Error(f"Descriptor XML for '{fingerprint_type}' not found.")
|
223 |
|
224 |
+
progress(0.3, desc="βοΈ Running PaDEL...")
|
225 |
+
yield f"βοΈ Running PaDEL...", None, gr.update(visible=False), None, current_state
|
226 |
+
padeldescriptor(mol_dir=smi_file, d_file=output_csv, descriptortypes=descriptortypes, detectaromaticity=True, standardizenitro=True, standardizetautomers=True, threads=-1, removesalt=True, log=False, fingerprints=True)
|
227 |
+
|
228 |
+
if not os.path.exists(output_csv) or os.path.getsize(output_csv) == 0:
|
229 |
+
raise gr.Error("PaDEL failed to produce an output file. Check molecule validity.")
|
230 |
|
231 |
+
progress(0.7, desc="π Processing results...")
|
232 |
+
yield "π Processing results...", None, gr.update(visible=False), None, current_state
|
233 |
+
df_X = pd.read_csv(output_csv).rename(columns={'Name': 'canonical_smiles'})
|
234 |
|
235 |
+
final_df = pd.merge(input_df[['canonical_smiles', 'pIC50']], df_X, on='canonical_smiles', how='inner')
|
236 |
|
237 |
+
current_state['fingerprint_data'] = final_df
|
238 |
+
current_state['fingerprint_type'] = fingerprint_type
|
239 |
+
|
240 |
+
progress(0.9, desc="πΌοΈ Generating molecule grid...")
|
241 |
+
|
242 |
+
# Test mols2grid functionality
|
243 |
+
print(f"Final dataframe shape: {final_df.shape}")
|
244 |
+
print(f"Columns: {final_df.columns.tolist()}")
|
245 |
+
print(f"First few SMILES: {final_df['canonical_smiles'].head().tolist()}")
|
246 |
+
|
247 |
+
# Test if RDKit can create molecules
|
248 |
+
from rdkit import Chem
|
249 |
+
test_mol = Chem.MolFromSmiles(final_df['canonical_smiles'].iloc[0])
|
250 |
+
print(f"Test molecule created: {test_mol is not None}")
|
251 |
+
|
252 |
+
# Create mols2grid with error handling
|
253 |
+
try:
|
254 |
+
mols_html = mols2grid.display(
|
255 |
+
final_df,
|
256 |
+
smiles_col='canonical_smiles',
|
257 |
+
subset=['img', 'pIC50'],
|
258 |
+
rename={"pIC50": "pIC50"},
|
259 |
+
transform={"pIC50": lambda x: f"{x:.2f}"}
|
260 |
+
)._repr_html_()
|
261 |
+
print("Mols2grid HTML created successfully")
|
262 |
+
except Exception as grid_error:
|
263 |
+
print(f"Mols2grid error: {grid_error}")
|
264 |
+
mols_html = f"<p>Error creating molecule grid: {str(grid_error)}</p>"
|
265 |
+
|
266 |
+
success_msg = f"β
Success! Generated {len(df_X.columns) -1} descriptors for {len(final_df)} molecules."
|
267 |
+
progress(1, desc="Completed!")
|
268 |
+
yield success_msg, final_df, gr.update(visible=True), gr.update(value=mols_html, visible=True), current_state
|
269 |
+
|
270 |
+
except Exception as e:
|
271 |
+
print(f"Full error: {e}")
|
272 |
+
raise gr.Error(f"Calculation failed: {e}")
|
273 |
+
|
274 |
+
finally:
|
275 |
+
if os.path.exists('molecule.smi'):
|
276 |
+
os.remove('molecule.smi')
|
277 |
+
if os.path.exists('fingerprints.csv'):
|
278 |
+
os.remove('fingerprints.csv')
|
279 |
+
|
280 |
|
281 |
# ==============================================================================
|
282 |
# === STEP 3: MODEL TRAINING & PREDICTION FUNCTIONS ===
|