Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,6 @@ from sklearn.model_selection import train_test_split
|
|
20 |
|
21 |
# 3D Visualization
|
22 |
import py3Dmol
|
23 |
-
from stmol import showmol # Import the new component
|
24 |
|
25 |
# Bokeh plotting
|
26 |
from bokeh.plotting import figure
|
@@ -150,28 +149,29 @@ def fetch_fasta_sequence(protein_id: str):
|
|
150 |
log += f"β An error occurred while fetching FASTA data: {e}\n"
|
151 |
return log
|
152 |
|
153 |
-
|
154 |
-
def visualize_protein_3d(pdb_data: str):
|
155 |
"""
|
156 |
Generates an interactive 3D protein visualization using py3Dmol.
|
157 |
"""
|
158 |
if not pdb_data:
|
159 |
return None, "Cannot generate 3D view: No PDB data provided."
|
160 |
try:
|
161 |
-
viewer = py3Dmol.view(width=
|
162 |
viewer.setBackgroundColor('#1C1C1C')
|
163 |
viewer.addModel(pdb_data, "pdb")
|
164 |
viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
|
165 |
viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'})
|
166 |
viewer.zoomTo()
|
167 |
-
|
168 |
-
|
|
|
169 |
except Exception as e:
|
170 |
return None, f"β 3D visualization error: {e}"
|
171 |
|
172 |
def create_sample_molecules():
|
173 |
"""
|
174 |
Returns a dictionary of sample molecules in Name:SMILES format.
|
|
|
175 |
"""
|
176 |
return {
|
177 |
"Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C",
|
@@ -181,9 +181,9 @@ def create_sample_molecules():
|
|
181 |
"Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor
|
182 |
"Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N",
|
183 |
"Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1",
|
184 |
-
"Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
|
185 |
-
"Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1",
|
186 |
-
"Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C",
|
187 |
}
|
188 |
|
189 |
def calculate_molecular_properties(smiles_dict: dict):
|
@@ -196,9 +196,13 @@ def calculate_molecular_properties(smiles_dict: dict):
|
|
196 |
mol = Chem.MolFromSmiles(smiles)
|
197 |
if mol:
|
198 |
props = {
|
199 |
-
'Molecule': name,
|
200 |
-
'
|
201 |
-
'
|
|
|
|
|
|
|
|
|
202 |
'RotBonds': Descriptors.NumRotatableBonds(mol),
|
203 |
}
|
204 |
properties.append(props)
|
@@ -212,6 +216,7 @@ def calculate_molecular_properties(smiles_dict: dict):
|
|
212 |
def assess_drug_likeness(df: pd.DataFrame):
|
213 |
"""
|
214 |
Assesses drug-likeness based on Lipinski's Rule of Five.
|
|
|
215 |
"""
|
216 |
if df.empty:
|
217 |
return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
|
@@ -222,6 +227,7 @@ def assess_drug_likeness(df: pd.DataFrame):
|
|
222 |
analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5
|
223 |
analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
|
224 |
analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
|
|
|
225 |
analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
|
226 |
|
227 |
display_df = df.copy()
|
@@ -229,10 +235,13 @@ def assess_drug_likeness(df: pd.DataFrame):
|
|
229 |
display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'β
Yes' if x else 'β No')
|
230 |
|
231 |
log = "β
Assessed drug-likeness using Lipinski's Rule of Five.\n"
|
|
|
232 |
return analysis_df, display_df, log
|
233 |
|
|
|
234 |
def plot_properties_dashboard(df: pd.DataFrame):
|
235 |
-
"""Creates a 2x2 dashboard of molecular property visualizations using Bokeh."""
|
|
|
236 |
if df.empty or 'Drug_Like' not in df.columns:
|
237 |
return None, "Cannot plot: No analysis data or 'Drug_Like' column missing."
|
238 |
|
@@ -251,56 +260,89 @@ def plot_properties_dashboard(df: pd.DataFrame):
|
|
251 |
])
|
252 |
|
253 |
plot_config = {
|
254 |
-
'sizing_mode': 'scale_width', 'aspect_ratio': 1,
|
255 |
-
'
|
|
|
|
|
256 |
}
|
257 |
|
258 |
def style_plot(p, x_label, y_label, title):
|
259 |
-
|
260 |
-
p.
|
261 |
-
p.
|
262 |
-
p.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
if p.legend:
|
264 |
-
p.legend.location
|
265 |
-
p.legend.
|
|
|
|
|
|
|
|
|
266 |
return p
|
267 |
|
268 |
-
p1 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
269 |
-
p1.scatter('MW', 'LogP', source=source, legend_group='Category',
|
270 |
-
|
271 |
-
p1.line([df['
|
|
|
272 |
style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
|
273 |
|
274 |
-
p2 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
275 |
-
p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
|
276 |
-
p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2)
|
277 |
-
p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2)
|
278 |
style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
|
279 |
|
280 |
-
p3 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
281 |
-
p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
|
282 |
-
p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2)
|
283 |
-
p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2)
|
284 |
style_plot(p3, "Topological Polar Surface Area (Γ
Β²)", "Rotatable Bonds", "Drug Permeability Indicators")
|
285 |
|
286 |
p4_config = plot_config.copy()
|
287 |
-
p4_config['tools']
|
|
|
288 |
p4 = figure(title="Drug-Likeness Distribution", **p4_config)
|
289 |
|
|
|
290 |
counts = df['Category'].value_counts()
|
291 |
-
data = pd.DataFrame(counts
|
292 |
-
data.columns = ['category', 'value']
|
293 |
data['angle'] = data['value']/data['value'].sum() * 2*pi
|
294 |
-
data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in
|
295 |
data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1)
|
296 |
-
|
|
|
|
|
|
|
|
|
297 |
|
298 |
wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
|
299 |
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
|
300 |
-
line_color="white",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
|
302 |
-
|
303 |
-
p4.text(0, 0, text=[f"{
|
304 |
text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold")
|
305 |
|
306 |
style_plot(p4, "", "", "Compound Classification")
|
@@ -312,18 +354,32 @@ def plot_properties_dashboard(df: pd.DataFrame):
|
|
312 |
|
313 |
# ===== Phase 2 Functions =====
|
314 |
def get_phase2_molecules():
|
|
|
|
|
|
|
|
|
315 |
return {
|
316 |
-
'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1',
|
317 |
-
'
|
318 |
-
'
|
319 |
-
'
|
320 |
-
'
|
321 |
-
'
|
322 |
-
'
|
323 |
-
'
|
324 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
}
|
326 |
|
|
|
327 |
def simulate_virtual_screening(smiles_dict: dict):
|
328 |
np.random.seed(42)
|
329 |
scores = np.random.uniform(2.0, 9.8, len(smiles_dict))
|
@@ -348,25 +404,83 @@ def predict_admet_properties(smiles_dict: dict):
|
|
348 |
log += f"β
Predicted ADMET properties for {len(df)} molecules.\n"
|
349 |
return df, log
|
350 |
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
"""Generates a 2D image and a 3D py3Dmol viewer object for a single molecule."""
|
355 |
try:
|
356 |
mol = Chem.MolFromSmiles(smiles)
|
357 |
-
if not mol: return
|
358 |
-
|
359 |
-
# 2D SVG Image
|
360 |
drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300)
|
|
|
361 |
drawer.drawOptions().clearBackground = False
|
362 |
-
drawer.drawOptions().
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
drawer.DrawMolecule(mol)
|
364 |
drawer.FinishDrawing()
|
365 |
svg_2d = drawer.GetDrawingText().replace('svg:', '')
|
366 |
-
# Simple color replacement for dark theme
|
367 |
-
svg_2d = svg_2d.replace('black', 'white')
|
368 |
|
369 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
mol_3d = Chem.AddHs(mol)
|
371 |
AllChem.EmbedMolecule(mol_3d, randomSeed=42)
|
372 |
AllChem.MMFFOptimizeMolecule(mol_3d)
|
@@ -377,48 +491,75 @@ def generate_molecule_visuals(smiles: str):
|
|
377 |
viewer.addModel(sdf_data, "sdf")
|
378 |
viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}})
|
379 |
viewer.zoomTo()
|
380 |
-
|
381 |
-
|
382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
except Exception as e:
|
384 |
-
return
|
385 |
|
386 |
-
# REFACTORED: This function now returns a py3Dmol viewer object, not HTML
|
387 |
def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
|
388 |
"""
|
389 |
Generates a protein-ligand interaction visualization using py3Dmol.
|
390 |
"""
|
391 |
if not pdb_data:
|
392 |
return None, "Cannot generate interaction view: No PDB data provided."
|
|
|
393 |
try:
|
394 |
-
viewer = py3Dmol.view(width=
|
395 |
viewer.setBackgroundColor('#1C1C1C')
|
|
|
|
|
396 |
viewer.addModel(pdb_data, "pdb")
|
|
|
|
|
397 |
viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
|
|
|
|
|
398 |
if ligand_resn:
|
399 |
viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
|
400 |
viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
|
401 |
-
viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
|
402 |
-
viewer.zoomTo({'resn': ligand_resn})
|
403 |
-
else:
|
404 |
-
viewer.zoomTo()
|
405 |
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
log = f"β
Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
|
407 |
-
return
|
|
|
408 |
except Exception as e:
|
409 |
return None, f"β Interaction visualization error: {e}"
|
410 |
|
411 |
# ===== Phase 3 Functions =====
|
412 |
def get_phase3_molecules():
|
|
|
|
|
|
|
|
|
413 |
return {
|
414 |
'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C',
|
415 |
'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O',
|
416 |
'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4',
|
417 |
'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C',
|
418 |
"Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
|
419 |
-
"Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C",
|
|
|
420 |
"Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C",
|
421 |
-
"Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O",
|
422 |
}
|
423 |
|
424 |
def calculate_comprehensive_properties(smiles_dict: dict):
|
@@ -463,48 +604,90 @@ def predict_toxicity(properties_df: pd.DataFrame):
|
|
463 |
# ===== Phase 4 Functions =====
|
464 |
def get_regulatory_summary():
|
465 |
summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'],
|
466 |
-
'Description': ['Data sourced from ChEMBL, PDB, GISAID.',
|
467 |
-
'
|
468 |
'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.',
|
469 |
'SHAP used for patient stratification model outputs.']}
|
470 |
return pd.DataFrame(summary), "β
Generated AI/ML documentation summary."
|
471 |
|
472 |
def simulate_rwd_analysis(adverse_event_text):
|
|
|
|
|
|
|
473 |
np.random.seed(42)
|
474 |
base_events = list(np.random.choice(
|
475 |
-
['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea'
|
476 |
-
|
|
|
477 |
))
|
|
|
478 |
user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
|
|
|
479 |
all_events = base_events + user_terms
|
480 |
-
|
|
|
|
|
|
|
481 |
results_df = event_counts.reset_index()
|
482 |
results_df.columns = ['Adverse_Event', 'Frequency']
|
483 |
-
|
|
|
484 |
|
|
|
485 |
source = ColumnDataSource(results_df)
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
p.background_fill_color = "#1C1C1C"
|
491 |
p.border_fill_color = "#1C1C1C"
|
|
|
492 |
p.title.text_color = "white"
|
493 |
-
p.
|
494 |
-
p.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
return results_df, p, log
|
496 |
|
497 |
def get_ethical_framework():
|
498 |
framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
|
499 |
'Implementation Strategy': [
|
500 |
'AI models prioritize patient outcomes and clinical efficacy.',
|
501 |
-
'Toxicity prediction models aim to minimize patient harm.',
|
502 |
-
'Algorithms are audited for demographic bias.',
|
503 |
'Model cards and SHAP values are provided for key decision-making processes.'
|
504 |
]}
|
505 |
return pd.DataFrame(framework), "β
Generated Ethical AI Framework summary."
|
506 |
|
507 |
# --- 3. Streamlit UI Layout ---
|
|
|
|
|
|
|
508 |
if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
|
509 |
if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
|
510 |
if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
|
@@ -514,179 +697,307 @@ if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
|
|
514 |
if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
|
515 |
if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
|
516 |
|
|
|
517 |
st.title("π¬ AI-Powered Drug Discovery Pipeline")
|
518 |
st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
|
519 |
|
|
|
520 |
tab1, tab2, tab3, tab4 = st.tabs([
|
521 |
-
"**Phase 1:** Target Identification",
|
522 |
-
"**Phase
|
|
|
|
|
523 |
])
|
524 |
|
525 |
# --- Phase 1: Target Identification ---
|
526 |
with tab1:
|
527 |
st.header("Phase 1: Target Identification & Initial Analysis")
|
528 |
-
|
529 |
-
|
|
|
|
|
|
|
|
|
530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0)
|
532 |
pdb_id_input = pdb_options[selected_pdb_name]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0)
|
534 |
protein_id_input = protein_options[selected_protein_name]
|
535 |
|
536 |
st.markdown("---")
|
|
|
537 |
sample_molecules = create_sample_molecules()
|
538 |
selected_molecules = st.multiselect(
|
539 |
-
"Select from known drugs:",
|
540 |
-
|
|
|
541 |
)
|
542 |
|
543 |
if st.button("π Run Phase 1 Analysis", key="run_p1"):
|
544 |
-
with st.spinner("
|
545 |
full_log = "--- Phase 1 Analysis Started ---\n"
|
|
|
546 |
pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
|
547 |
full_log += log_pdb
|
548 |
-
|
|
|
|
|
549 |
smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
|
550 |
properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
|
551 |
full_log += log_props
|
|
|
552 |
analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
|
553 |
full_log += log_likeness
|
554 |
-
|
|
|
555 |
full_log += log_3d
|
|
|
556 |
dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
|
557 |
full_log += log_dash
|
558 |
-
|
559 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
|
561 |
-
st.text_area("Status & Logs", st.session_state.log_p1, height=200)
|
562 |
|
563 |
-
|
|
|
|
|
|
|
564 |
res1 = st.session_state.results_p1
|
565 |
p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
|
|
|
566 |
with p1_tabs[0]:
|
567 |
st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
|
568 |
-
if res1.get('
|
569 |
-
|
|
|
|
|
|
|
570 |
with p1_tabs[1]:
|
571 |
st.subheader("Physicochemical Properties Analysis")
|
|
|
572 |
st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
573 |
if res1.get('dashboard'):
|
574 |
st.bokeh_chart(res1['dashboard'], use_container_width=True)
|
575 |
|
|
|
576 |
# --- Phase 2: Hit Discovery & ADMET ---
|
577 |
with tab2:
|
578 |
st.header("Phase 2: Virtual Screening & Early ADMET")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
579 |
p2_molecules = get_phase2_molecules()
|
580 |
st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
|
581 |
|
|
|
582 |
interaction_pdb_options = {
|
583 |
-
"Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"},
|
584 |
-
"
|
|
|
|
|
585 |
}
|
586 |
-
selected_interaction_pdb_name = st.selectbox(
|
|
|
|
|
|
|
|
|
587 |
p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"]
|
588 |
p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"]
|
589 |
|
|
|
|
|
|
|
590 |
if st.button("π Run Phase 2 Analysis", key="run_p2"):
|
591 |
-
with st.spinner("Running
|
592 |
full_log = "--- Phase 2 Analysis Started ---\n"
|
|
|
593 |
screening_df, log_screen = simulate_virtual_screening(p2_molecules)
|
594 |
full_log += log_screen
|
595 |
admet_df, log_admet = predict_admet_properties(p2_molecules)
|
596 |
full_log += log_admet
|
|
|
597 |
merged_df = pd.merge(screening_df, admet_df, on="Molecule")
|
|
|
598 |
pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
|
599 |
full_log += log_pdb_p2
|
600 |
-
|
|
|
601 |
full_log += log_interact
|
602 |
-
st.session_state.log_p2 = full_log + "\n--- Phase 2 Analysis Complete ---"
|
603 |
-
st.session_state.results_p2 = {'merged_df': merged_df, 'interaction_viewer': interaction_viewer}
|
604 |
|
605 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
606 |
|
607 |
-
|
|
|
|
|
|
|
608 |
res2 = st.session_state.results_p2
|
609 |
p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
|
|
|
610 |
with p2_tabs[0]:
|
|
|
611 |
st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
612 |
with p2_tabs[1]:
|
613 |
-
st.subheader(f"Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
|
614 |
-
if res2.get('
|
615 |
-
|
616 |
-
|
|
|
|
|
617 |
# --- Phase 3: Lead Optimization ---
|
618 |
with tab3:
|
619 |
st.header("Phase 3: Lead Compound Optimization")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
620 |
p3_molecules = get_phase3_molecules()
|
621 |
selected_leads = st.multiselect(
|
622 |
-
"Select lead compounds to optimize:",
|
623 |
-
|
|
|
624 |
)
|
625 |
|
626 |
if st.button("π Run Phase 3 Analysis", key="run_p3"):
|
627 |
-
with st.spinner("
|
628 |
full_log = "--- Phase 3 Analysis Started ---\n"
|
|
|
629 |
smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
|
|
|
630 |
comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
|
631 |
full_log += log_comp
|
|
|
632 |
toxicity_df, log_tox = predict_toxicity(comp_props_df)
|
633 |
full_log += log_tox
|
|
|
634 |
final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
|
635 |
-
|
636 |
-
|
637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
638 |
|
639 |
-
st.text_area("Status & Logs", st.session_state.log_p3, height=200)
|
640 |
|
641 |
-
|
|
|
|
|
|
|
|
|
642 |
res3 = st.session_state.results_p3
|
643 |
st.subheader("Lead Compound Analysis & Toxicity Prediction")
|
644 |
st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
645 |
|
646 |
st.subheader("2D & 3D Molecular Structures")
|
647 |
-
for name,
|
648 |
-
st.
|
649 |
-
|
650 |
-
with col1:
|
651 |
-
st.markdown("##### 2D Structure")
|
652 |
-
if svg_2d:
|
653 |
-
st.image(svg_2d)
|
654 |
-
with col2:
|
655 |
-
st.markdown("##### 3D Structure")
|
656 |
-
if viewer_3d:
|
657 |
-
showmol(viewer_3d, height=300, width=400)
|
658 |
|
659 |
# --- Phase 4: Pre-clinical & RWE ---
|
660 |
with tab4:
|
661 |
st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
662 |
rwd_input = st.text_area(
|
663 |
"Enter simulated adverse event report text:",
|
664 |
-
"Patient reports include instances of headache, severe nausea, and occasional skin rash.",
|
|
|
665 |
)
|
666 |
|
667 |
if st.button("π Run Phase 4 Analysis", key="run_p4"):
|
668 |
-
with st.spinner("
|
669 |
full_log = "--- Phase 4 Analysis Started ---\n"
|
|
|
670 |
reg_df, log_reg = get_regulatory_summary()
|
671 |
full_log += log_reg
|
|
|
672 |
eth_df, log_eth = get_ethical_framework()
|
673 |
full_log += log_eth
|
|
|
674 |
rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
|
675 |
full_log += log_rwd
|
676 |
-
|
677 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
678 |
|
679 |
-
st.text_area("Status & Logs", st.session_state.log_p4, height=200)
|
680 |
|
681 |
-
|
|
|
|
|
|
|
682 |
res4 = st.session_state.results_p4
|
683 |
p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
|
|
|
684 |
with p4_tabs[0]:
|
|
|
685 |
if res4.get('plot_bar'):
|
686 |
st.bokeh_chart(res4['plot_bar'], use_container_width=True)
|
687 |
st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
688 |
with p4_tabs[1]:
|
689 |
st.subheader("AI/ML Model Regulatory Summary")
|
690 |
st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
691 |
st.subheader("Ethical AI Framework")
|
692 |
-
st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
|
20 |
|
21 |
# 3D Visualization
|
22 |
import py3Dmol
|
|
|
23 |
|
24 |
# Bokeh plotting
|
25 |
from bokeh.plotting import figure
|
|
|
149 |
log += f"β An error occurred while fetching FASTA data: {e}\n"
|
150 |
return log
|
151 |
|
152 |
+
def visualize_protein_3d(pdb_data: str, title="Protein 3D Structure"):
|
|
|
153 |
"""
|
154 |
Generates an interactive 3D protein visualization using py3Dmol.
|
155 |
"""
|
156 |
if not pdb_data:
|
157 |
return None, "Cannot generate 3D view: No PDB data provided."
|
158 |
try:
|
159 |
+
viewer = py3Dmol.view(width='100%', height=600)
|
160 |
viewer.setBackgroundColor('#1C1C1C')
|
161 |
viewer.addModel(pdb_data, "pdb")
|
162 |
viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
|
163 |
viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'})
|
164 |
viewer.zoomTo()
|
165 |
+
html = viewer._make_html()
|
166 |
+
log = f"β
Generated 3D visualization for {title}."
|
167 |
+
return html, log
|
168 |
except Exception as e:
|
169 |
return None, f"β 3D visualization error: {e}"
|
170 |
|
171 |
def create_sample_molecules():
|
172 |
"""
|
173 |
Returns a dictionary of sample molecules in Name:SMILES format.
|
174 |
+
Expanded list for more comprehensive demonstration.
|
175 |
"""
|
176 |
return {
|
177 |
"Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C",
|
|
|
181 |
"Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor
|
182 |
"Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N",
|
183 |
"Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1",
|
184 |
+
"Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1", # Complex structure, tyrosine kinase inhibitor
|
185 |
+
"Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1", # Calcium channel blocker
|
186 |
+
"Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C", # Statin
|
187 |
}
|
188 |
|
189 |
def calculate_molecular_properties(smiles_dict: dict):
|
|
|
196 |
mol = Chem.MolFromSmiles(smiles)
|
197 |
if mol:
|
198 |
props = {
|
199 |
+
'Molecule': name,
|
200 |
+
'SMILES': smiles,
|
201 |
+
'MW': Descriptors.MolWt(mol),
|
202 |
+
'LogP': Descriptors.MolLogP(mol),
|
203 |
+
'HBD': Descriptors.NumHDonors(mol),
|
204 |
+
'HBA': Descriptors.NumHAcceptors(mol),
|
205 |
+
'TPSA': Descriptors.TPSA(mol),
|
206 |
'RotBonds': Descriptors.NumRotatableBonds(mol),
|
207 |
}
|
208 |
properties.append(props)
|
|
|
216 |
def assess_drug_likeness(df: pd.DataFrame):
|
217 |
"""
|
218 |
Assesses drug-likeness based on Lipinski's Rule of Five.
|
219 |
+
This version returns a boolean for plotting and a formatted string for display.
|
220 |
"""
|
221 |
if df.empty:
|
222 |
return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
|
|
|
227 |
analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5
|
228 |
analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
|
229 |
analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
|
230 |
+
|
231 |
analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
|
232 |
|
233 |
display_df = df.copy()
|
|
|
235 |
display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'β
Yes' if x else 'β No')
|
236 |
|
237 |
log = "β
Assessed drug-likeness using Lipinski's Rule of Five.\n"
|
238 |
+
|
239 |
return analysis_df, display_df, log
|
240 |
|
241 |
+
|
242 |
def plot_properties_dashboard(df: pd.DataFrame):
|
243 |
+
"""Creates a professional 2x2 dashboard of molecular property visualizations using Bokeh."""
|
244 |
+
from math import pi, cos, sin
|
245 |
if df.empty or 'Drug_Like' not in df.columns:
|
246 |
return None, "Cannot plot: No analysis data or 'Drug_Like' column missing."
|
247 |
|
|
|
260 |
])
|
261 |
|
262 |
plot_config = {
|
263 |
+
'sizing_mode': 'scale_width', 'aspect_ratio': 1,
|
264 |
+
'background_fill_color': None, 'border_fill_color': None,
|
265 |
+
'outline_line_color': '#333333', 'min_border_left': 50,
|
266 |
+
'min_border_right': 50, 'min_border_top': 50, 'min_border_bottom': 50
|
267 |
}
|
268 |
|
269 |
def style_plot(p, x_label, y_label, title):
|
270 |
+
"""Apply consistent professional styling to plots."""
|
271 |
+
p.title.text = title
|
272 |
+
p.title.text_color = '#FFFFFF'
|
273 |
+
p.title.text_font_size = '14pt'
|
274 |
+
p.title.text_font_style = 'bold'
|
275 |
+
|
276 |
+
p.xaxis.axis_label = x_label
|
277 |
+
p.yaxis.axis_label = y_label
|
278 |
+
p.axis.axis_label_text_color = '#CCCCCC'
|
279 |
+
p.axis.axis_label_text_font_size = '11pt'
|
280 |
+
p.axis.major_label_text_color = '#AAAAAA'
|
281 |
+
p.axis.major_label_text_font_size = '10pt'
|
282 |
+
|
283 |
+
p.grid.grid_line_color = '#2A2A2A'
|
284 |
+
p.grid.grid_line_alpha = 0.3
|
285 |
+
|
286 |
if p.legend:
|
287 |
+
p.legend.location = "top_right"
|
288 |
+
p.legend.background_fill_color = '#1A1A1A'
|
289 |
+
p.legend.background_fill_alpha = 0.8
|
290 |
+
p.legend.border_line_color = '#444444'
|
291 |
+
p.legend.label_text_color = '#FFFFFF'
|
292 |
+
p.legend.click_policy = "mute"
|
293 |
return p
|
294 |
|
295 |
+
p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
296 |
+
p1.scatter('MW', 'LogP', source=source, legend_group='Category',
|
297 |
+
color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
298 |
+
p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW β€ 500")
|
299 |
+
p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP β€ 5")
|
300 |
style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
|
301 |
|
302 |
+
p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
303 |
+
p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
304 |
+
p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD β€ 5")
|
305 |
+
p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA β€ 10")
|
306 |
style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
|
307 |
|
308 |
+
p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
309 |
+
p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
310 |
+
p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA β€ 140")
|
311 |
+
p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds β€ 10")
|
312 |
style_plot(p3, "Topological Polar Surface Area (Γ
Β²)", "Rotatable Bonds", "Drug Permeability Indicators")
|
313 |
|
314 |
p4_config = plot_config.copy()
|
315 |
+
p4_config['tools'] = "hover"
|
316 |
+
p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
|
317 |
p4 = figure(title="Drug-Likeness Distribution", **p4_config)
|
318 |
|
319 |
+
# Calculate percentages for the doughnut chart
|
320 |
counts = df['Category'].value_counts()
|
321 |
+
data = pd.DataFrame({'category': counts.index, 'value': counts.values})
|
|
|
322 |
data['angle'] = data['value']/data['value'].sum() * 2*pi
|
323 |
+
data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
|
324 |
data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1)
|
325 |
+
|
326 |
+
# Calculate overall drug-like percentage for central text
|
327 |
+
total_compounds = len(df)
|
328 |
+
drug_like_count = df['Drug_Like'].sum()
|
329 |
+
drug_like_percentage = (drug_like_count / total_compounds * 100) if total_compounds > 0 else 0
|
330 |
|
331 |
wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
|
332 |
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
|
333 |
+
line_color="white", line_width=3, fill_color='color',
|
334 |
+
legend_field='category', source=data)
|
335 |
+
|
336 |
+
# Updated HoverTool to display percentage
|
337 |
+
donut_hover = HoverTool(tooltips=[
|
338 |
+
("Category", "@category"),
|
339 |
+
("Count", "@value"),
|
340 |
+
("Percentage", "@percentage{%0.1f}%%") # Display percentage with one decimal place
|
341 |
+
], renderers=[wedge_renderer])
|
342 |
+
p4.add_tools(donut_hover)
|
343 |
|
344 |
+
# Updated central text to show Drug-Like percentage
|
345 |
+
p4.text([0], [0], text=[f"{total_compounds}\nCompounds\n({drug_like_percentage:.1f}% Drug-Like)"],
|
346 |
text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold")
|
347 |
|
348 |
style_plot(p4, "", "", "Compound Classification")
|
|
|
354 |
|
355 |
# ===== Phase 2 Functions =====
|
356 |
def get_phase2_molecules():
|
357 |
+
"""
|
358 |
+
Returns an expanded list of common drugs with corrected SMILES for virtual screening.
|
359 |
+
These are chosen to be well-known and diverse in their therapeutic areas.
|
360 |
+
"""
|
361 |
return {
|
362 |
+
'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1',
|
363 |
+
'Ibuprofen (Pain/Inflammation)': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
|
364 |
+
'Aspirin (Pain/Antiplatelet)': 'CC(=O)Oc1ccccc1C(=O)O',
|
365 |
+
'Naproxen (Pain/Inflammation)': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
|
366 |
+
'Diazepam (Anxiolytic)': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12',
|
367 |
+
'Metformin (Diabetes)': 'CN(C)C(=N)N=C(N)N',
|
368 |
+
'Loratadine (Antihistamine)': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1',
|
369 |
+
'Morphine (Opioid Analgesic)': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
|
370 |
+
'Cetirizine (Antihistamine)': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO',
|
371 |
+
'Fluoxetine (Antidepressant)': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
|
372 |
+
'Amoxicillin (Antibiotic)': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C',
|
373 |
+
'Atorvastatin (Cholesterol)': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
|
374 |
+
'Ciprofloxacin (Antibiotic)': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12',
|
375 |
+
'Warfarin (Anticoagulant)': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
|
376 |
+
'Furosemide (Diuretic)': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
|
377 |
+
'Sildenafil (Erectile Dysfunction)': 'CCCC1=NN(C)C(=NC1=O)c1cc(N2CCN(C)CC2)c(OC)cc1S(=O)(=O)C',
|
378 |
+
'Omeprazole (GERD)': 'COc1ccc(C)c(c1NC(=O)c1cn(Cc2ccc(OC)cc2)cn1)OC', # Proton pump inhibitor
|
379 |
+
'Losartan (Hypertension)': 'Cc1cnc(n1C)c1ccc(cc1)-c1ccccc1COC(=O)c1ccccc1', # Angiotensin Receptor Blocker
|
380 |
}
|
381 |
|
382 |
+
|
383 |
def simulate_virtual_screening(smiles_dict: dict):
|
384 |
np.random.seed(42)
|
385 |
scores = np.random.uniform(2.0, 9.8, len(smiles_dict))
|
|
|
404 |
log += f"β
Predicted ADMET properties for {len(df)} molecules.\n"
|
405 |
return df, log
|
406 |
|
407 |
+
def visualize_molecule_2d_3d(smiles: str, name: str):
|
408 |
+
"""Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
|
409 |
+
log = ""
|
|
|
410 |
try:
|
411 |
mol = Chem.MolFromSmiles(smiles)
|
412 |
+
if not mol: return f"<p>Invalid SMILES for {name}</p>", f"β Invalid SMILES for {name}"
|
413 |
+
|
|
|
414 |
drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300)
|
415 |
+
# Set dark theme colors for 2D drawing
|
416 |
drawer.drawOptions().clearBackground = False
|
417 |
+
drawer.drawOptions().addStereoAnnotation = True
|
418 |
+
drawer.drawOptions().baseFontSize = 0.8
|
419 |
+
drawer.drawOptions().circleAtoms = False
|
420 |
+
drawer.drawOptions().highlightColour = (1, 0.5, 0) # Orange for highlights
|
421 |
+
|
422 |
+
# Set colors for dark background visibility
|
423 |
+
drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11) # Dark background
|
424 |
+
drawer.drawOptions().symbolColour = (1, 1, 1) # White symbols
|
425 |
+
drawer.drawOptions().defaultColour = (1, 1, 1) # White default color
|
426 |
+
|
427 |
+
# Try to set annotation color (this might help with (R)/(S) labels)
|
428 |
+
try:
|
429 |
+
drawer.drawOptions().annotationColour = (1, 1, 1) # White annotations
|
430 |
+
except:
|
431 |
+
pass
|
432 |
+
|
433 |
drawer.DrawMolecule(mol)
|
434 |
drawer.FinishDrawing()
|
435 |
svg_2d = drawer.GetDrawingText().replace('svg:', '')
|
|
|
|
|
436 |
|
437 |
+
# More aggressive SVG text color fixes - target all possible black text variations
|
438 |
+
|
439 |
+
# First, comprehensive string replacements
|
440 |
+
svg_2d = svg_2d.replace('stroke="black"', 'stroke="white"')
|
441 |
+
svg_2d = svg_2d.replace('fill="black"', 'fill="white"')
|
442 |
+
svg_2d = svg_2d.replace('stroke="#000000"', 'stroke="#FFFFFF"')
|
443 |
+
svg_2d = svg_2d.replace('fill="#000000"', 'fill="#FFFFFF"')
|
444 |
+
svg_2d = svg_2d.replace('stroke="#000"', 'stroke="#FFF"')
|
445 |
+
svg_2d = svg_2d.replace('fill="#000"', 'fill="#FFF"')
|
446 |
+
svg_2d = svg_2d.replace('stroke:black', 'stroke:white')
|
447 |
+
svg_2d = svg_2d.replace('fill:black', 'fill:white')
|
448 |
+
svg_2d = svg_2d.replace('stroke:#000000', 'stroke:#FFFFFF')
|
449 |
+
svg_2d = svg_2d.replace('fill:#000000', 'fill:#FFFFFF')
|
450 |
+
svg_2d = svg_2d.replace('stroke:#000', 'stroke:#FFF')
|
451 |
+
svg_2d = svg_2d.replace('fill:#000', 'fill="#FFF"')
|
452 |
+
svg_2d = svg_2d.replace('stroke="rgb(0,0,0)"', 'stroke="rgb(255,255,255)"')
|
453 |
+
svg_2d = svg_2d.replace('fill="rgb(0,0,0)"', 'fill="rgb(255,255,255)"')
|
454 |
+
svg_2d = svg_2d.replace('stroke:rgb(0,0,0)', 'stroke:rgb(255,255,255)')
|
455 |
+
svg_2d = svg_2d.replace('fill:rgb(0,0,0)', 'fill:rgb(255,255,255)')
|
456 |
+
svg_2d = svg_2d.replace('color="black"', 'color="white"')
|
457 |
+
svg_2d = svg_2d.replace('color:#000000', 'color:#FFFFFF')
|
458 |
+
svg_2d = svg_2d.replace('color:#000', 'color:#FFF')
|
459 |
+
|
460 |
+
# Aggressive regex-based fixes for all text elements
|
461 |
+
# Remove any existing fill attributes from text elements and add white fill
|
462 |
+
svg_2d = re.sub(r'<text([^>]*?)\s+fill="[^"]*"([^>]*?)>', r'<text\1\2 fill="white">', svg_2d)
|
463 |
+
svg_2d = re.sub(r'<text([^>]*?)(?<!fill="white")>', r'<text\1 fill="white">', svg_2d)
|
464 |
+
|
465 |
+
# Fix style attributes in text elements
|
466 |
+
svg_2d = re.sub(r'<text([^>]*?)style="([^"]*?)fill:\s*(?:black|#000000|#000|rgb\(0,0,0\))([^"]*?)"([^>]*?)>',
|
467 |
+
r'<text\1style="\2fill:white\3"\4>', svg_2d)
|
468 |
+
|
469 |
+
# If text elements don't have any fill specified, ensure they get white
|
470 |
+
svg_2d = re.sub(r'<text(?![^>]*fill=)([^>]*?)>', r'<text fill="white"\1>', svg_2d)
|
471 |
+
|
472 |
+
# Clean up any duplicate fill attributes
|
473 |
+
svg_2d = re.sub(r'fill="white"\s+fill="white"', 'fill="white"', svg_2d)
|
474 |
+
|
475 |
+
# Final catch-all: replace any remaining black in the entire SVG
|
476 |
+
svg_2d = re.sub(r'\bblack\b', 'white', svg_2d)
|
477 |
+
svg_2d = re.sub(r'#000000', '#FFFFFF', svg_2d)
|
478 |
+
svg_2d = re.sub(r'#000\b', '#FFF', svg_2d)
|
479 |
+
svg_2d = re.sub(r'rgb\(0,\s*0,\s*0\)', 'rgb(255,255,255)', svg_2d)
|
480 |
+
|
481 |
+
# Embed the SVG within a div with a dark background for consistency
|
482 |
+
svg_2d = f'<div style="background-color: #1C1C1C; padding: 10px; border-radius: 5px;">{svg_2d}</div>'
|
483 |
+
|
484 |
mol_3d = Chem.AddHs(mol)
|
485 |
AllChem.EmbedMolecule(mol_3d, randomSeed=42)
|
486 |
AllChem.MMFFOptimizeMolecule(mol_3d)
|
|
|
491 |
viewer.addModel(sdf_data, "sdf")
|
492 |
viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}})
|
493 |
viewer.zoomTo()
|
494 |
+
html_3d = viewer._make_html()
|
495 |
+
|
496 |
+
combined_html = f"""
|
497 |
+
<div style="display: flex; flex-direction: row; align-items: center; justify-content: space-around; border: 1px solid #444; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #2b2b2b;">
|
498 |
+
<div style="text-align: center;">
|
499 |
+
<h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (2D Structure)</h4>
|
500 |
+
{svg_2d}
|
501 |
+
</div>
|
502 |
+
<div style="text-align: center;">
|
503 |
+
<h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (3D Interactive)</h4>
|
504 |
+
{html_3d}
|
505 |
+
</div>
|
506 |
+
</div>
|
507 |
+
"""
|
508 |
+
log += f"β
Generated 2D/3D view for {name}.\n"
|
509 |
+
return combined_html, log
|
510 |
except Exception as e:
|
511 |
+
return f"<p>Error visualizing {name}: {e}</p>", f"β Error visualizing {name}: {e}"
|
512 |
|
|
|
513 |
def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
|
514 |
"""
|
515 |
Generates a protein-ligand interaction visualization using py3Dmol.
|
516 |
"""
|
517 |
if not pdb_data:
|
518 |
return None, "Cannot generate interaction view: No PDB data provided."
|
519 |
+
|
520 |
try:
|
521 |
+
viewer = py3Dmol.view(width='100%', height=650)
|
522 |
viewer.setBackgroundColor('#1C1C1C')
|
523 |
+
|
524 |
+
# Add the protein structure
|
525 |
viewer.addModel(pdb_data, "pdb")
|
526 |
+
|
527 |
+
# Style the protein (cartoon representation)
|
528 |
viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
|
529 |
+
|
530 |
+
# Highlight the ligand if specified
|
531 |
if ligand_resn:
|
532 |
viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
|
533 |
viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
|
|
|
|
|
|
|
|
|
534 |
|
535 |
+
# Add surface representation for binding site
|
536 |
+
viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
|
537 |
+
|
538 |
+
viewer.zoomTo({'resn': ligand_resn} if ligand_resn else {})
|
539 |
+
|
540 |
+
html = viewer._make_html()
|
541 |
log = f"β
Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
|
542 |
+
return html, log
|
543 |
+
|
544 |
except Exception as e:
|
545 |
return None, f"β Interaction visualization error: {e}"
|
546 |
|
547 |
# ===== Phase 3 Functions =====
|
548 |
def get_phase3_molecules():
|
549 |
+
"""
|
550 |
+
Returns an expanded list of lead compounds for optimization.
|
551 |
+
These are chosen to be representative of active pharmaceutical ingredients or advanced candidates.
|
552 |
+
"""
|
553 |
return {
|
554 |
'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C',
|
555 |
'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O',
|
556 |
'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4',
|
557 |
'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C',
|
558 |
"Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
|
559 |
+
"Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C", # Multi-kinase inhibitor for cancer
|
560 |
+
# CORRECTED SMILES for Venetoclax
|
561 |
"Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C",
|
562 |
+
"Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O", # Multi-kinase inhibitor for leukemia
|
563 |
}
|
564 |
|
565 |
def calculate_comprehensive_properties(smiles_dict: dict):
|
|
|
604 |
# ===== Phase 4 Functions =====
|
605 |
def get_regulatory_summary():
|
606 |
summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'],
|
607 |
+
'Description': ['Data sourced from ChEMBL, PDB, GISAID. Bias assessed via geographic distribution analysis.',
|
608 |
+
'Graph Convolutional Network (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).',
|
609 |
'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.',
|
610 |
'SHAP used for patient stratification model outputs.']}
|
611 |
return pd.DataFrame(summary), "β
Generated AI/ML documentation summary."
|
612 |
|
613 |
def simulate_rwd_analysis(adverse_event_text):
|
614 |
+
"""
|
615 |
+
Analyzes simulated adverse event text and generates a DataFrame and Bokeh plot.
|
616 |
+
"""
|
617 |
np.random.seed(42)
|
618 |
base_events = list(np.random.choice(
|
619 |
+
['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea', 'constipation', 'insomnia', 'muscle pain'],
|
620 |
+
100,
|
621 |
+
p=[0.2, 0.15, 0.12, 0.12, 0.1, 0.08, 0.08, 0.05, 0.05, 0.05] # Adjusted probabilities for new events
|
622 |
))
|
623 |
+
|
624 |
user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
|
625 |
+
|
626 |
all_events = base_events + user_terms
|
627 |
+
|
628 |
+
events_df = pd.DataFrame(all_events, columns=['Adverse_Event'])
|
629 |
+
event_counts = events_df['Adverse_Event'].value_counts().nlargest(10).sort_values(ascending=False)
|
630 |
+
|
631 |
results_df = event_counts.reset_index()
|
632 |
results_df.columns = ['Adverse_Event', 'Frequency']
|
633 |
+
|
634 |
+
log = f"β
Analyzed {len(all_events)} total event reports. Identified {len(event_counts)} unique adverse events for plotting.\n"
|
635 |
|
636 |
+
# Create Bokeh Plot
|
637 |
source = ColumnDataSource(results_df)
|
638 |
+
y_range = results_df['Adverse_Event'].tolist()[::-1]
|
639 |
+
|
640 |
+
hover = HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")])
|
641 |
+
|
642 |
+
p = figure(
|
643 |
+
y_range=y_range, height=450, title="Top 10 Reported Adverse Events",
|
644 |
+
sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save",
|
645 |
+
)
|
646 |
+
p.add_tools(hover)
|
647 |
+
|
648 |
+
p.hbar(
|
649 |
+
y='Adverse_Event', right='Frequency', source=source, height=0.7,
|
650 |
+
color='#00A0FF', line_color='white', legend_label="Event Frequency"
|
651 |
+
)
|
652 |
+
|
653 |
+
# Style the plot for a dark theme
|
654 |
p.background_fill_color = "#1C1C1C"
|
655 |
p.border_fill_color = "#1C1C1C"
|
656 |
+
p.outline_line_color = '#333333'
|
657 |
p.title.text_color = "white"
|
658 |
+
p.title.text_font_size = '16pt'
|
659 |
+
p.title.align = "center"
|
660 |
+
p.xaxis.axis_label = "Frequency Count"
|
661 |
+
p.yaxis.axis_label = "Adverse Event"
|
662 |
+
p.axis.axis_label_text_color = "#CCCCCC"
|
663 |
+
p.axis.axis_label_text_font_size = "12pt"
|
664 |
+
p.axis.major_label_text_color = "#AAAAAA"
|
665 |
+
p.axis.major_label_text_font_size = "10pt"
|
666 |
+
p.grid.grid_line_alpha = 0.3
|
667 |
+
p.grid.grid_line_color = "#444444"
|
668 |
+
p.x_range.start = 0
|
669 |
+
p.legend.location = "top_right"
|
670 |
+
p.legend.background_fill_color = "#2A2A2A"
|
671 |
+
p.legend.background_fill_alpha = 0.7
|
672 |
+
p.legend.border_line_color = "#444444"
|
673 |
+
p.legend.label_text_color = "white"
|
674 |
+
|
675 |
return results_df, p, log
|
676 |
|
677 |
def get_ethical_framework():
|
678 |
framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
|
679 |
'Implementation Strategy': [
|
680 |
'AI models prioritize patient outcomes and clinical efficacy.',
|
681 |
+
'Toxicity prediction and pharmacovigilance models aim to minimize patient harm.',
|
682 |
+
'Algorithms are audited for demographic bias in training data and predictions.',
|
683 |
'Model cards and SHAP values are provided for key decision-making processes.'
|
684 |
]}
|
685 |
return pd.DataFrame(framework), "β
Generated Ethical AI Framework summary."
|
686 |
|
687 |
# --- 3. Streamlit UI Layout ---
|
688 |
+
|
689 |
+
# Initialize session state variables
|
690 |
+
if 'active_tab' not in st.session_state: st.session_state.active_tab = "Phase 1: Target Identification"
|
691 |
if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
|
692 |
if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
|
693 |
if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
|
|
|
697 |
if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
|
698 |
if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
|
699 |
|
700 |
+
# --- Header ---
|
701 |
st.title("π¬ AI-Powered Drug Discovery Pipeline")
|
702 |
st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
|
703 |
|
704 |
+
# --- Main Tabs for Each Phase ---
|
705 |
tab1, tab2, tab3, tab4 = st.tabs([
|
706 |
+
"**Phase 1:** Target Identification",
|
707 |
+
"**Phase 2:** Hit Discovery & ADMET",
|
708 |
+
"**Phase 3:** Lead Optimization",
|
709 |
+
"**Phase 4:** Pre-clinical & RWE"
|
710 |
])
|
711 |
|
712 |
# --- Phase 1: Target Identification ---
|
713 |
with tab1:
|
714 |
st.header("Phase 1: Target Identification & Initial Analysis")
|
715 |
+
st.markdown("""
|
716 |
+
In this initial phase, we identify and analyze a biological target (e.g., a protein) implicated in a disease.
|
717 |
+
We fetch its 3D structure and sequence data, then evaluate a set of initial compounds for their drug-like properties.
|
718 |
+
""")
|
719 |
+
|
720 |
+
st.subheader("Inputs & Controls")
|
721 |
|
722 |
+
# Updated PDB ID options
|
723 |
+
pdb_options = {
|
724 |
+
"Neuraminidase (Influenza - 2HU4)": "2HU4",
|
725 |
+
"KRAS G12D (Oncogenic Target - 7XKJ)": "7XKJ", # Bound to MRTX-1133
|
726 |
+
"SARS-CoV-2 Mpro (Antiviral Target - 8HUR)": "8HUR", # Bound to Ensitrelvir
|
727 |
+
"EGFR Kinase (Cancer Target - 1M17)": "1M17", # Bound to Erlotinib
|
728 |
+
}
|
729 |
selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0)
|
730 |
pdb_id_input = pdb_options[selected_pdb_name]
|
731 |
+
|
732 |
+
# Updated NCBI Protein ID options
|
733 |
+
protein_options = {
|
734 |
+
"Neuraminidase (P03468)": "P03468", # Influenza A virus (A/PR/8/34)
|
735 |
+
"KRAS (P01116)": "P01116", # Human KRAS
|
736 |
+
"SARS-CoV-2 Main Protease (P0DTD1)": "P0DTD1", # SARS-CoV-2 Mpro
|
737 |
+
"EGFR (P00533)": "P00533", # Human Epidermal Growth Factor Receptor
|
738 |
+
}
|
739 |
selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0)
|
740 |
protein_id_input = protein_options[selected_protein_name]
|
741 |
|
742 |
st.markdown("---")
|
743 |
+
st.write("**Analyze Sample Compounds:**")
|
744 |
sample_molecules = create_sample_molecules()
|
745 |
selected_molecules = st.multiselect(
|
746 |
+
"Select from known drugs:",
|
747 |
+
options=list(sample_molecules.keys()),
|
748 |
+
default=["Oseltamivir (Influenza)", "Aspirin (Pain/Inflammation)", "Imatinib (Gleevec - Cancer)"] # Adjusted default selection
|
749 |
)
|
750 |
|
751 |
if st.button("π Run Phase 1 Analysis", key="run_p1"):
|
752 |
+
with st.spinner("Fetching data and calculating properties..."):
|
753 |
full_log = "--- Phase 1 Analysis Started ---\n"
|
754 |
+
|
755 |
pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
|
756 |
full_log += log_pdb
|
757 |
+
log_fasta = fetch_fasta_sequence(protein_id_input)
|
758 |
+
full_log += log_fasta
|
759 |
+
|
760 |
smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
|
761 |
properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
|
762 |
full_log += log_props
|
763 |
+
|
764 |
analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
|
765 |
full_log += log_likeness
|
766 |
+
|
767 |
+
protein_view_html, log_3d = visualize_protein_3d(pdb_data, title=f"PDB: {pdb_id_input}")
|
768 |
full_log += log_3d
|
769 |
+
|
770 |
dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
|
771 |
full_log += log_dash
|
772 |
+
|
773 |
+
full_log += "\n--- Phase 1 Analysis Complete ---"
|
774 |
+
st.session_state.log_p1 = full_log
|
775 |
+
|
776 |
+
st.session_state.results_p1 = {
|
777 |
+
'pdb_data': pdb_data,
|
778 |
+
'protein_view': protein_view_html,
|
779 |
+
'properties_df': display_df,
|
780 |
+
'dashboard': dashboard_plot
|
781 |
+
}
|
782 |
|
783 |
+
st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
|
784 |
|
785 |
+
st.subheader("Results")
|
786 |
+
if not st.session_state.results_p1:
|
787 |
+
st.info("Click 'Run Phase 1 Analysis' to generate and display results.")
|
788 |
+
else:
|
789 |
res1 = st.session_state.results_p1
|
790 |
p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
|
791 |
+
|
792 |
with p1_tabs[0]:
|
793 |
st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
|
794 |
+
if res1.get('protein_view'):
|
795 |
+
st.components.v1.html(res1['protein_view'], height=600, scrolling=False)
|
796 |
+
else:
|
797 |
+
st.warning("Could not display 3D structure. Check PDB ID and logs.")
|
798 |
+
|
799 |
with p1_tabs[1]:
|
800 |
st.subheader("Physicochemical Properties Analysis")
|
801 |
+
# The data table is now displayed *before* the dashboard.
|
802 |
st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
803 |
if res1.get('dashboard'):
|
804 |
st.bokeh_chart(res1['dashboard'], use_container_width=True)
|
805 |
|
806 |
+
|
807 |
# --- Phase 2: Hit Discovery & ADMET ---
|
808 |
with tab2:
|
809 |
st.header("Phase 2: Virtual Screening & Early ADMET")
|
810 |
+
st.markdown("""
|
811 |
+
This phase simulates a virtual screening process to identify 'hits' from a larger library of compounds.
|
812 |
+
We predict their binding affinity to the target and assess their basic ADMET (Absorption, Distribution,
|
813 |
+
Metabolism, Excretion, Toxicity) profiles.
|
814 |
+
""")
|
815 |
+
|
816 |
+
st.subheader("Inputs & Controls")
|
817 |
+
|
818 |
p2_molecules = get_phase2_molecules()
|
819 |
st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
|
820 |
|
821 |
+
# Updated PDB ID for Interaction options
|
822 |
interaction_pdb_options = {
|
823 |
+
"Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"},
|
824 |
+
"KRAS G12C + MRTX-1133 (7XKJ)": {"pdb": "7XKJ", "ligand": "M13"},
|
825 |
+
"SARS-CoV-2 Mpro + Ensitrelvir (8HUR)": {"pdb": "8HUR", "ligand": "X77"},
|
826 |
+
"EGFR + Erlotinib (1M17)": {"pdb": "1M17", "ligand": "ERL"},
|
827 |
}
|
828 |
+
selected_interaction_pdb_name = st.selectbox(
|
829 |
+
"Select PDB ID for Interaction:",
|
830 |
+
options=list(interaction_pdb_options.keys()),
|
831 |
+
index=0 # Default to Neuraminidase
|
832 |
+
)
|
833 |
p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"]
|
834 |
p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"]
|
835 |
|
836 |
+
st.write(f"Selected PDB: `{p2_pdb_id}`, Selected Ligand Residue Name: `{p2_ligand_resn}`")
|
837 |
+
|
838 |
+
|
839 |
if st.button("π Run Phase 2 Analysis", key="run_p2"):
|
840 |
+
with st.spinner("Running virtual screening and ADMET predictions..."):
|
841 |
full_log = "--- Phase 2 Analysis Started ---\n"
|
842 |
+
|
843 |
screening_df, log_screen = simulate_virtual_screening(p2_molecules)
|
844 |
full_log += log_screen
|
845 |
admet_df, log_admet = predict_admet_properties(p2_molecules)
|
846 |
full_log += log_admet
|
847 |
+
|
848 |
merged_df = pd.merge(screening_df, admet_df, on="Molecule")
|
849 |
+
|
850 |
pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
|
851 |
full_log += log_pdb_p2
|
852 |
+
|
853 |
+
interaction_view, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
|
854 |
full_log += log_interact
|
|
|
|
|
855 |
|
856 |
+
full_log += "\n--- Phase 2 Analysis Complete ---"
|
857 |
+
st.session_state.log_p2 = full_log
|
858 |
+
st.session_state.results_p2 = {
|
859 |
+
'merged_df': merged_df,
|
860 |
+
'interaction_view': interaction_view
|
861 |
+
}
|
862 |
+
|
863 |
+
st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
|
864 |
|
865 |
+
st.subheader("Results")
|
866 |
+
if not st.session_state.results_p2:
|
867 |
+
st.info("Click 'Run Phase 2 Analysis' to generate and display results.")
|
868 |
+
else:
|
869 |
res2 = st.session_state.results_p2
|
870 |
p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
|
871 |
+
|
872 |
with p2_tabs[0]:
|
873 |
+
st.subheader("Virtual Screening & Early ADMET Predictions")
|
874 |
st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
875 |
+
|
876 |
with p2_tabs[1]:
|
877 |
+
st.subheader(f"Simulated Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
|
878 |
+
if res2.get('interaction_view'):
|
879 |
+
st.components.v1.html(res2['interaction_view'], height=700, scrolling=False)
|
880 |
+
else:
|
881 |
+
st.warning("Could not display interaction view. Check inputs and logs.")
|
882 |
+
|
883 |
# --- Phase 3: Lead Optimization ---
|
884 |
with tab3:
|
885 |
st.header("Phase 3: Lead Compound Optimization")
|
886 |
+
st.markdown("""
|
887 |
+
In lead optimization, promising 'hit' compounds are refined to improve their efficacy and safety.
|
888 |
+
Here, we analyze a few selected lead candidates, perform more detailed property calculations,
|
889 |
+
and predict their toxicity risk using a simulated machine learning model.
|
890 |
+
""")
|
891 |
+
|
892 |
+
st.subheader("Inputs & Controls")
|
893 |
+
|
894 |
p3_molecules = get_phase3_molecules()
|
895 |
selected_leads = st.multiselect(
|
896 |
+
"Select lead compounds to optimize:",
|
897 |
+
options=list(p3_molecules.keys()),
|
898 |
+
default=['Oseltamivir (Influenza)', 'Remdesivir (Antiviral)', 'Imatinib (Gleevec - Cancer)'] # Adjusted default selection
|
899 |
)
|
900 |
|
901 |
if st.button("π Run Phase 3 Analysis", key="run_p3"):
|
902 |
+
with st.spinner("Analyzing lead compounds and predicting toxicity..."):
|
903 |
full_log = "--- Phase 3 Analysis Started ---\n"
|
904 |
+
|
905 |
smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
|
906 |
+
|
907 |
comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
|
908 |
full_log += log_comp
|
909 |
+
|
910 |
toxicity_df, log_tox = predict_toxicity(comp_props_df)
|
911 |
full_log += log_tox
|
912 |
+
|
913 |
final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
|
914 |
+
|
915 |
+
visuals = {}
|
916 |
+
for name, smiles in smiles_to_analyze_p3.items():
|
917 |
+
html_view, log_vis = visualize_molecule_2d_3d(smiles, name)
|
918 |
+
visuals[name] = html_view
|
919 |
+
full_log += log_vis
|
920 |
+
|
921 |
+
full_log += "\n--- Phase 3 Analysis Complete ---"
|
922 |
+
st.session_state.log_p3 = full_log
|
923 |
+
st.session_state.results_p3 = {
|
924 |
+
'final_df': final_df,
|
925 |
+
'visuals': visuals
|
926 |
+
}
|
927 |
|
928 |
+
st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
|
929 |
|
930 |
+
st.subheader("Results")
|
931 |
+
if not st.session_state.results_p3:
|
932 |
+
st.info("Click 'Run Phase 3 Analysis' to generate and display results.")
|
933 |
+
else:
|
934 |
+
# Corrected from results_3 to results_p3
|
935 |
res3 = st.session_state.results_p3
|
936 |
st.subheader("Lead Compound Analysis & Toxicity Prediction")
|
937 |
st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
938 |
|
939 |
st.subheader("2D & 3D Molecular Structures")
|
940 |
+
for name, visual_html in res3.get('visuals', {}).items():
|
941 |
+
st.components.v1.html(visual_html, height=430, scrolling=False)
|
942 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
943 |
|
944 |
# --- Phase 4: Pre-clinical & RWE ---
|
945 |
with tab4:
|
946 |
st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
|
947 |
+
st.markdown("""
|
948 |
+
This final phase simulates post-market analysis. We analyze text data for adverse events (pharmacovigilance)
|
949 |
+
and present documentation related to the AI models and ethical frameworks that would be required for regulatory submission.
|
950 |
+
""")
|
951 |
+
|
952 |
+
st.subheader("Inputs & Controls")
|
953 |
+
|
954 |
rwd_input = st.text_area(
|
955 |
"Enter simulated adverse event report text:",
|
956 |
+
"Patient reports include instances of headache, severe nausea, and occasional skin rash. Some noted dizziness after taking the medication.",
|
957 |
+
height=150
|
958 |
)
|
959 |
|
960 |
if st.button("π Run Phase 4 Analysis", key="run_p4"):
|
961 |
+
with st.spinner("Analyzing real-world data and generating reports..."):
|
962 |
full_log = "--- Phase 4 Analysis Started ---\n"
|
963 |
+
|
964 |
reg_df, log_reg = get_regulatory_summary()
|
965 |
full_log += log_reg
|
966 |
+
|
967 |
eth_df, log_eth = get_ethical_framework()
|
968 |
full_log += log_eth
|
969 |
+
|
970 |
rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
|
971 |
full_log += log_rwd
|
972 |
+
full_log += "\n--- Phase 4 Analysis Complete ---"
|
973 |
+
st.session_state.log_p4 = full_log
|
974 |
+
|
975 |
+
st.session_state.results_p4 = {
|
976 |
+
'rwd_df': rwd_df,
|
977 |
+
'plot_bar': plot_bar,
|
978 |
+
'reg_df': reg_df,
|
979 |
+
'eth_df': eth_df
|
980 |
+
}
|
981 |
|
982 |
+
st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
|
983 |
|
984 |
+
st.subheader("Results")
|
985 |
+
if not st.session_state.results_p4:
|
986 |
+
st.info("Click 'Run Phase 4 Analysis' to generate and display results.")
|
987 |
+
else:
|
988 |
res4 = st.session_state.results_p4
|
989 |
p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
|
990 |
+
|
991 |
with p4_tabs[0]:
|
992 |
+
st.subheader("Simulated Adverse Event Analysis")
|
993 |
if res4.get('plot_bar'):
|
994 |
st.bokeh_chart(res4['plot_bar'], use_container_width=True)
|
995 |
st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
996 |
+
|
997 |
with p4_tabs[1]:
|
998 |
st.subheader("AI/ML Model Regulatory Summary")
|
999 |
st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
1000 |
+
|
1001 |
st.subheader("Ethical AI Framework")
|
1002 |
+
st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
1003 |
+
|