Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,8 @@
|
|
1 |
-
# app.py
|
2 |
# AI-Powered Drug Discovery Pipeline Streamlit Application
|
3 |
# This script integrates four phases of drug discovery into a single, interactive web app.
|
4 |
-
|
5 |
import streamlit as st
|
6 |
import pandas as pd
|
7 |
import numpy as np
|
8 |
-
import matplotlib.pyplot as plt
|
9 |
-
import seaborn as sns
|
10 |
import requests
|
11 |
import io
|
12 |
import re
|
@@ -41,16 +37,15 @@ st.set_page_config(
|
|
41 |
page_title="AI Drug Discovery Pipeline",
|
42 |
page_icon="π¬",
|
43 |
layout="wide",
|
44 |
-
initial_sidebar_state="collapsed",
|
45 |
)
|
46 |
|
47 |
-
# Custom CSS for a professional,
|
48 |
def apply_custom_styling():
|
49 |
st.markdown(
|
50 |
"""
|
51 |
<style>
|
52 |
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
|
53 |
-
|
54 |
html, body, [class*="st-"] {
|
55 |
font-family: 'Roboto', sans-serif;
|
56 |
}
|
@@ -92,6 +87,11 @@ def apply_custom_styling():
|
|
92 |
color: #FFF;
|
93 |
background-color: #00A0FF;
|
94 |
}
|
|
|
|
|
|
|
|
|
|
|
95 |
</style>
|
96 |
""",
|
97 |
unsafe_allow_html=True
|
@@ -189,7 +189,7 @@ def calculate_molecular_properties(smiles_dict: dict):
|
|
189 |
mol = Chem.MolFromSmiles(smiles)
|
190 |
if mol:
|
191 |
props = {
|
192 |
-
'Molecule': name,
|
193 |
'SMILES': smiles,
|
194 |
'MW': Descriptors.MolWt(mol),
|
195 |
'LogP': Descriptors.MolLogP(mol),
|
@@ -214,7 +214,6 @@ def assess_drug_likeness(df: pd.DataFrame):
|
|
214 |
if df.empty:
|
215 |
return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
|
216 |
|
217 |
-
# Create a copy for analysis to avoid modifying the original dataframe
|
218 |
analysis_df = df.copy()
|
219 |
analysis_df['MW_OK'] = analysis_df['MW'] <= 500
|
220 |
analysis_df['LogP_OK'] = analysis_df['LogP'] <= 5
|
@@ -222,17 +221,14 @@ def assess_drug_likeness(df: pd.DataFrame):
|
|
222 |
analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
|
223 |
analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
|
224 |
|
225 |
-
# This boolean column is for the plotting function
|
226 |
analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
|
227 |
|
228 |
-
# Create a separate DataFrame for display purposes with emojis
|
229 |
display_df = df.copy()
|
230 |
display_df['Lipinski_Violations'] = analysis_df['Lipinski_Violations']
|
231 |
display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'β
Yes' if x else 'β No')
|
232 |
|
233 |
log = "β
Assessed drug-likeness using Lipinski's Rule of Five.\n"
|
234 |
|
235 |
-
# Return both the analysis_df (for plotting) and display_df (for tables)
|
236 |
return analysis_df, display_df, log
|
237 |
|
238 |
|
@@ -245,36 +241,22 @@ def plot_properties_dashboard(df: pd.DataFrame):
|
|
245 |
if df['Drug_Like'].dtype != bool:
|
246 |
return None, f"Cannot plot: 'Drug_Like' column must be boolean, but it is {df['Drug_Like'].dtype}."
|
247 |
|
248 |
-
# Prepare data
|
249 |
df['Category'] = df['Drug_Like'].apply(lambda x: 'Drug-Like' if x else 'Non-Drug-Like')
|
250 |
source = ColumnDataSource(df)
|
251 |
|
252 |
-
|
253 |
-
colors = ['#00D4AA', '#FF6B6B'] # Teal for drug-like, coral for non-drug-like
|
254 |
color_mapper = factor_cmap('Category', palette=colors, factors=["Drug-Like", "Non-Drug-Like"])
|
255 |
|
256 |
-
# Enhanced hover tooltip for scatter plots
|
257 |
scatter_hover = HoverTool(tooltips=[
|
258 |
-
("Compound", "@Molecule"),
|
259 |
-
("
|
260 |
-
("LogP", "@LogP{0.00}"),
|
261 |
-
("HBD", "@HBD"),
|
262 |
-
("HBA", "@HBA"),
|
263 |
-
("TPSA", "@TPSA{0.0} Γ
Β²"),
|
264 |
-
("Category", "@Category")
|
265 |
])
|
266 |
|
267 |
-
# Common plot configuration - responsive plots with a 1:1 aspect ratio
|
268 |
plot_config = {
|
269 |
-
'sizing_mode': 'scale_width',
|
270 |
-
'
|
271 |
-
'
|
272 |
-
'
|
273 |
-
'outline_line_color': '#333333',
|
274 |
-
'min_border_left': 50,
|
275 |
-
'min_border_right': 50,
|
276 |
-
'min_border_top': 50,
|
277 |
-
'min_border_bottom': 50
|
278 |
}
|
279 |
|
280 |
def style_plot(p, x_label, y_label, title):
|
@@ -300,146 +282,61 @@ def plot_properties_dashboard(df: pd.DataFrame):
|
|
300 |
p.legend.background_fill_alpha = 0.8
|
301 |
p.legend.border_line_color = '#444444'
|
302 |
p.legend.label_text_color = '#FFFFFF'
|
303 |
-
p.legend.label_text_font_size = '10pt'
|
304 |
p.legend.click_policy = "mute"
|
305 |
-
p.legend.glyph_height = 15
|
306 |
-
p.legend.spacing = 5
|
307 |
-
|
308 |
return p
|
309 |
|
310 |
-
# Plot 1: MW vs LogP with Lipinski guidelines
|
311 |
p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
312 |
p1.scatter('MW', 'LogP', source=source, legend_group='Category',
|
313 |
color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
314 |
-
|
315 |
-
#
|
316 |
-
p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5],
|
317 |
-
line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW β€ 500")
|
318 |
-
p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5],
|
319 |
-
line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP β€ 5")
|
320 |
-
|
321 |
style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
|
322 |
|
323 |
-
# Plot 2: HBD vs HBA
|
324 |
p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
325 |
-
p2.scatter('HBD', 'HBA', source=source, legend_group='Category',
|
326 |
-
|
327 |
-
|
328 |
-
# Add Lipinski rule lines
|
329 |
-
p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1],
|
330 |
-
line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD β€ 5")
|
331 |
-
p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10],
|
332 |
-
line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA β€ 10")
|
333 |
-
|
334 |
style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
|
335 |
|
336 |
-
# Plot 3: TPSA vs Rotatable Bonds with guidelines
|
337 |
p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
338 |
-
p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category',
|
339 |
-
|
340 |
-
|
341 |
-
# Add permeability guideline lines
|
342 |
-
p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1],
|
343 |
-
line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA β€ 140")
|
344 |
-
p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10],
|
345 |
-
line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds β€ 10")
|
346 |
-
|
347 |
style_plot(p3, "Topological Polar Surface Area (Γ
Β²)", "Rotatable Bonds", "Drug Permeability Indicators")
|
348 |
-
|
349 |
-
# Plot 4: Enhanced Donut Chart
|
350 |
-
# --- MODIFICATION ---
|
351 |
-
# Configure donut plot separately as it doesn't need all scatter tools
|
352 |
p4_config = plot_config.copy()
|
353 |
-
p4_config['tools'] = "hover"
|
354 |
p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
|
355 |
p4 = figure(title="Drug-Likeness Distribution", **p4_config)
|
356 |
-
|
357 |
-
# Calculate percentages and create donut chart
|
358 |
counts = df['Category'].value_counts()
|
359 |
-
|
360 |
-
data =
|
361 |
-
|
362 |
-
'value': counts.values,
|
363 |
-
'percentage': (counts.values / total * 100), # Keep full precision for hover
|
364 |
-
'angle': counts.values / total * 2 * pi,
|
365 |
-
'color': [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
|
366 |
-
})
|
367 |
-
|
368 |
-
# Calculate start and end angles for each wedge
|
369 |
-
data['start_angle'] = 0
|
370 |
-
data['end_angle'] = 0
|
371 |
-
cumulative_angle = 0
|
372 |
-
for i in range(len(data)):
|
373 |
-
data.iloc[i, data.columns.get_loc('start_angle')] = cumulative_angle
|
374 |
-
cumulative_angle += data.iloc[i]['angle']
|
375 |
-
data.iloc[i, data.columns.get_loc('end_angle')] = cumulative_angle
|
376 |
-
|
377 |
-
donut_source = ColumnDataSource(data)
|
378 |
-
|
379 |
-
# Create donut using annular wedges (outer ring) - sized to fit within boundaries
|
380 |
wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
|
381 |
-
start_angle='
|
382 |
line_color="white", line_width=3, fill_color='color',
|
383 |
-
legend_field='category', source=
|
384 |
-
|
385 |
-
|
386 |
-
for i, row in data.iterrows():
|
387 |
-
# Calculate middle angle for text positioning
|
388 |
-
mid_angle = (row['start_angle'] + row['end_angle']) / 2
|
389 |
-
# Position text at middle radius of the annular wedge
|
390 |
-
text_radius = 0.35
|
391 |
-
x_pos = text_radius * cos(mid_angle)
|
392 |
-
y_pos = text_radius * sin(mid_angle)
|
393 |
-
|
394 |
-
p4.text([x_pos], [y_pos], text=[f"{row['percentage']:.1f}%"],
|
395 |
-
text_align="center", text_baseline="middle",
|
396 |
-
text_color="white", text_font_size="11pt", text_font_style="bold")
|
397 |
-
|
398 |
-
# Add center text
|
399 |
-
p4.text([0], [0], text=[f"{len(df)}\nCompounds"],
|
400 |
-
text_align="center", text_baseline="middle",
|
401 |
-
text_color="white", text_font_size="14pt", text_font_style="bold")
|
402 |
-
|
403 |
-
# --- MODIFICATION ---
|
404 |
-
# Custom hover for donut with detailed info
|
405 |
-
donut_hover = HoverTool(
|
406 |
-
tooltips=[
|
407 |
-
("Category", "@category"),
|
408 |
-
("Count", "@value"),
|
409 |
-
("Percentage", "@percentage{0.1f}%")
|
410 |
-
],
|
411 |
-
renderers=[wedge_renderer] # Attach hover tool specifically to the wedge glyph
|
412 |
-
)
|
413 |
p4.add_tools(donut_hover)
|
414 |
-
|
415 |
style_plot(p4, "", "", "Compound Classification")
|
416 |
p4.axis.visible = False
|
417 |
p4.grid.visible = False
|
418 |
|
419 |
-
|
420 |
-
grid = gridplot([[p1, p2], [p3, p4]], sizing_mode='scale_width',
|
421 |
-
toolbar_location='right', merge_tools=True)
|
422 |
-
|
423 |
return grid, "β
Generated enhanced molecular properties dashboard."
|
424 |
|
425 |
# ===== Phase 2 Functions =====
|
426 |
def get_phase2_molecules():
|
427 |
"""Returns an expanded list of common drugs with corrected SMILES."""
|
428 |
return {
|
429 |
-
'Paracetamol': 'CC(=O)Nc1ccc(O)cc1',
|
430 |
-
'
|
431 |
-
'
|
432 |
-
'
|
433 |
-
'
|
434 |
-
'
|
435 |
-
'
|
436 |
-
'Morphine': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
|
437 |
-
'Cetirizine': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO',
|
438 |
-
'Fluoxetine': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
|
439 |
-
'Amoxicillin': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C',
|
440 |
-
'Atorvastatin': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
|
441 |
-
'Ciprofloxacin': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12',
|
442 |
-
'Warfarin': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
|
443 |
'Furosemide': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
|
444 |
}
|
445 |
|
@@ -468,8 +365,6 @@ def predict_admet_properties(smiles_dict: dict):
|
|
468 |
log += f"β
Predicted ADMET properties for {len(df)} molecules.\n"
|
469 |
return df, log
|
470 |
|
471 |
-
# --- MODIFIED FUNCTION ---
|
472 |
-
# This is the updated function to correctly render 2D molecules on a dark background.
|
473 |
def visualize_molecule_2d_3d(smiles: str, name: str):
|
474 |
"""Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
|
475 |
log = ""
|
@@ -575,27 +470,41 @@ def visualize_molecule_2d_3d(smiles: str, name: str):
|
|
575 |
return combined_html, log
|
576 |
except Exception as e:
|
577 |
return f"<p>Error visualizing {name}: {e}</p>", f"β Error visualizing {name}: {e}"
|
578 |
-
|
579 |
-
def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn
|
580 |
-
"""
|
581 |
-
|
|
|
|
|
|
|
|
|
582 |
try:
|
583 |
-
viewer = py3Dmol.view(width='100%', height=
|
584 |
viewer.setBackgroundColor('#1C1C1C')
|
|
|
|
|
585 |
viewer.addModel(pdb_data, "pdb")
|
586 |
-
|
587 |
-
|
588 |
-
viewer.
|
589 |
-
|
590 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
html = viewer._make_html()
|
592 |
-
log =
|
593 |
-
f"π’ Green: Ligand ({ligand_resn})\n"
|
594 |
-
f"π Orange: Residues within 4Γ
of ligand\n")
|
595 |
return html, log
|
|
|
596 |
except Exception as e:
|
597 |
-
return None, f"β
|
598 |
-
|
599 |
# ===== Phase 3 Functions =====
|
600 |
def get_phase3_molecules():
|
601 |
return {
|
@@ -654,368 +563,358 @@ def get_regulatory_summary():
|
|
654 |
return pd.DataFrame(summary), "β
Generated AI/ML documentation summary."
|
655 |
|
656 |
def simulate_rwd_analysis(adverse_event_text):
|
|
|
|
|
|
|
657 |
np.random.seed(42)
|
658 |
-
base_events = list(np.random.choice(
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
|
|
|
|
663 |
|
664 |
-
|
665 |
-
fig_bar, ax_bar = plt.subplots(figsize=(10, 6))
|
666 |
|
667 |
-
|
668 |
-
|
669 |
|
670 |
-
|
671 |
-
|
672 |
-
ax_bar.set_xlabel('Number of Reports')
|
673 |
-
ax_bar.set_ylabel('Adverse Event')
|
674 |
|
675 |
-
|
676 |
|
677 |
-
|
|
|
|
|
678 |
|
679 |
-
|
680 |
-
framework = {'Pillar': ['1. Beneficence & Non-Maleficence', '2. Justice & Fairness', '3. Transparency & Explainability', '4. Accountability & Governance'],
|
681 |
-
'Description': ['AI should help patients and do no harm. Requires rigorous validation and safety monitoring.',
|
682 |
-
'AI must not create or worsen health disparities. Requires bias detection and mitigation.',
|
683 |
-
'Clinical decisions influenced by AI must be understandable. Requires interpretable models.',
|
684 |
-
'Clear lines of responsibility for AI systems must be established. Requires human oversight.']}
|
685 |
-
return pd.DataFrame(framework), "β
Generated ethical framework summary."
|
686 |
|
|
|
|
|
|
|
|
|
|
|
687 |
|
688 |
-
|
|
|
|
|
|
|
689 |
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
699 |
if 'results_p1' not in st.session_state: st.session_state.results_p1 = {}
|
700 |
-
if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Phase 2 logs will appear here."
|
701 |
if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
|
702 |
-
if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Phase 3 logs will appear here."
|
703 |
if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
|
704 |
-
if 'log_p4' not in st.session_state: st.session_state.log_p4 = "Phase 4 logs will appear here."
|
705 |
if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
|
706 |
|
|
|
|
|
|
|
|
|
|
|
707 |
tab1, tab2, tab3, tab4 = st.tabs([
|
708 |
-
"Phase 1
|
709 |
-
"Phase 2
|
710 |
-
"Phase 3
|
711 |
-
"Phase 4
|
712 |
])
|
713 |
|
714 |
-
#
|
715 |
with tab1:
|
716 |
-
st.header("
|
717 |
-
st.markdown("
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
for line in molecules_input_p1.strip().split('\n'):
|
745 |
-
cleaned_line = line.replace('\xa0', ' ').strip()
|
746 |
-
if ':' in cleaned_line:
|
747 |
-
name, smiles = cleaned_line.split(':', 1)
|
748 |
-
smiles_dict_p1[name.strip()] = smiles.strip()
|
749 |
-
if smiles_dict_p1:
|
750 |
-
full_log += f"β
Successfully parsed {len(smiles_dict_p1)} molecules from input.\n"
|
751 |
-
else:
|
752 |
-
full_log += "β οΈ Could not parse any molecules. Please check the format (e.g., 'Aspirin:CC...').\n"
|
753 |
-
except Exception as e:
|
754 |
-
full_log += f"β Error parsing molecules list: {e}\n"
|
755 |
-
smiles_dict_p1 = {}
|
756 |
-
else:
|
757 |
-
full_log += "β οΈ Molecule input is empty. No analysis to perform.\n"
|
758 |
-
|
759 |
-
if smiles_dict_p1:
|
760 |
-
pdb_data, log_pdb_fetch = fetch_pdb_structure(pdb_id_input)
|
761 |
-
full_log += log_pdb_fetch
|
762 |
-
fasta_log = fetch_fasta_sequence(protein_id_input)
|
763 |
-
full_log += fasta_log
|
764 |
-
protein_view_html, log_3d_viz = visualize_protein_3d(pdb_data, pdb_id_input)
|
765 |
-
full_log += log_3d_viz
|
766 |
|
767 |
-
|
|
|
768 |
full_log += log_props
|
769 |
|
770 |
-
analysis_df, display_df,
|
771 |
-
full_log +=
|
772 |
|
773 |
-
|
774 |
-
full_log +=
|
|
|
|
|
|
|
|
|
|
|
|
|
775 |
|
776 |
-
lipinski_cols = ['Molecule', 'MW', 'LogP', 'HBD', 'HBA', 'Lipinski_Violations', 'Drug_Like']
|
777 |
-
lipinski_subset_df = display_df[lipinski_cols] if not display_df.empty else pd.DataFrame(columns=lipinski_cols)
|
778 |
-
|
779 |
st.session_state.results_p1 = {
|
780 |
-
'
|
781 |
-
'
|
782 |
-
'
|
783 |
-
'
|
784 |
-
'props_plot': props_plot
|
785 |
}
|
786 |
-
else:
|
787 |
-
st.session_state.results_p1 = {}
|
788 |
-
|
789 |
-
full_log += "\n--- Phase 1 Analysis Complete ---"
|
790 |
-
st.session_state.log_p1 = full_log
|
791 |
|
792 |
st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
|
793 |
-
|
794 |
-
|
|
|
|
|
|
|
795 |
res1 = st.session_state.results_p1
|
796 |
-
p1_tabs = st.tabs(["
|
|
|
797 |
with p1_tabs[0]:
|
798 |
-
st.subheader("
|
799 |
-
if res1.get('
|
800 |
-
|
801 |
-
st.bokeh_chart(res1['props_plot'], use_container_width=True)
|
802 |
else:
|
803 |
-
st.warning("Could not
|
|
|
804 |
with p1_tabs[1]:
|
805 |
-
st.subheader("
|
806 |
-
|
807 |
-
|
808 |
-
st.dataframe(res1.get('
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
st.components.v1.html(res1['protein_view_html'], height=600, scrolling=False)
|
813 |
-
st.subheader("FASTA Sequence Information")
|
814 |
-
st.text_area("", res1.get('fasta_log', 'No data'), height=200, key="fasta_info_area")
|
815 |
-
|
816 |
-
# ===== TAB 2: LEAD GENERATION & OPTIMIZATION =====
|
817 |
with tab2:
|
818 |
-
st.header("
|
819 |
-
st.markdown("
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
|
829 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
830 |
|
831 |
-
|
832 |
-
|
833 |
-
|
834 |
-
height=250,
|
835 |
-
key="p2_molecules"
|
836 |
-
)
|
837 |
-
|
838 |
-
run_phase2_btn = st.form_submit_button("π Run Phase 2 Analysis", use_container_width=True)
|
839 |
-
|
840 |
-
if run_phase2_btn:
|
841 |
-
full_log = "--- Starting Phase 2 Analysis ---\n"
|
842 |
-
|
843 |
-
smiles_dict = {}
|
844 |
-
if molecules_input.strip():
|
845 |
-
try:
|
846 |
-
for line in molecules_input.strip().split('\n'):
|
847 |
-
cleaned_line = line.replace('\xa0', ' ').strip()
|
848 |
-
if ':' in cleaned_line:
|
849 |
-
name, smiles = cleaned_line.split(':', 1)
|
850 |
-
smiles_dict[name.strip()] = smiles.strip()
|
851 |
-
if smiles_dict:
|
852 |
-
full_log += f"β
Successfully parsed {len(smiles_dict)} molecules from input.\n"
|
853 |
-
else:
|
854 |
-
full_log += "β οΈ Could not parse any molecules. Please check the format (e.g., 'Aspirin:CC(=O)OC1=CC=CC=C1C(=O)O').\n"
|
855 |
-
except Exception as e:
|
856 |
-
full_log += f"β Error parsing molecules list: {e}\n"
|
857 |
-
smiles_dict = {}
|
858 |
-
else:
|
859 |
-
full_log += "β οΈ Molecule input is empty. No analysis to perform.\n"
|
860 |
-
|
861 |
-
if smiles_dict:
|
862 |
-
screening_df, log_screening = simulate_virtual_screening(smiles_dict)
|
863 |
-
full_log += log_screening
|
864 |
-
admet_df, log_admet = predict_admet_properties(smiles_dict)
|
865 |
full_log += log_admet
|
866 |
|
867 |
-
|
868 |
-
|
869 |
-
|
870 |
-
|
871 |
-
|
872 |
-
|
873 |
-
full_log +=
|
874 |
-
|
875 |
-
pdb_data, log_pdb_fetch_2 = fetch_pdb_structure(phase2_pdb_id_input)
|
876 |
-
full_log += log_pdb_fetch_2
|
877 |
-
interaction_html, log_interaction = visualize_protein_ligand_interaction(pdb_data, phase2_pdb_id_input, phase2_ligand_resn)
|
878 |
-
full_log += log_interaction
|
879 |
|
|
|
|
|
880 |
st.session_state.results_p2 = {
|
881 |
-
'
|
882 |
-
'
|
883 |
-
'combined_viz_html': combined_viz_html,
|
884 |
-
'interaction_html': interaction_html,
|
885 |
-
'molecules_used': smiles_dict
|
886 |
}
|
887 |
-
|
888 |
-
st.session_state.results_p2 = {}
|
889 |
-
|
890 |
-
full_log += "\n--- Phase 2 Analysis Complete ---"
|
891 |
-
st.session_state.log_p2 = full_log
|
892 |
-
|
893 |
st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
|
894 |
|
895 |
-
|
|
|
|
|
|
|
896 |
res2 = st.session_state.results_p2
|
897 |
-
p2_tabs = st.tabs(["
|
|
|
898 |
with p2_tabs[0]:
|
899 |
-
|
900 |
-
|
901 |
-
|
902 |
-
st.dataframe(res2.get('screening_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
903 |
-
with col2:
|
904 |
-
st.subheader("ADMET Properties Prediction")
|
905 |
-
st.dataframe(res2.get('admet_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
906 |
with p2_tabs[1]:
|
907 |
-
|
908 |
-
if
|
909 |
-
st.
|
910 |
-
st.info(f"Currently visualizing: {', '.join(molecules_used.keys())}")
|
911 |
else:
|
912 |
-
st.
|
913 |
-
|
914 |
-
if res2.get('combined_viz_html'):
|
915 |
-
st.components.v1.html(res2.get('combined_viz_html'), height=len(molecules_used) * 400 + 100, scrolling=True)
|
916 |
-
else:
|
917 |
-
st.warning("No molecule visualizations available. Please run the analysis first.")
|
918 |
-
with p2_tabs[2]:
|
919 |
-
st.subheader("Detailed view of the top candidate binding to the protein.")
|
920 |
-
if res2.get('interaction_html'):
|
921 |
-
st.components.v1.html(res2.get('interaction_html'), height=700, scrolling=False)
|
922 |
-
else:
|
923 |
-
st.warning("No protein-ligand interaction view available. Please run the analysis first.")
|
924 |
|
925 |
-
#
|
926 |
with tab3:
|
927 |
-
st.header("
|
928 |
-
st.markdown("
|
929 |
-
|
930 |
-
|
931 |
-
|
932 |
-
|
933 |
-
|
934 |
-
|
935 |
-
|
936 |
-
|
937 |
-
|
938 |
-
|
939 |
-
|
940 |
-
|
941 |
-
|
942 |
-
|
943 |
-
|
944 |
-
|
945 |
-
|
946 |
-
|
947 |
-
|
948 |
-
|
949 |
-
|
950 |
-
|
951 |
-
|
952 |
-
|
953 |
-
|
954 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
955 |
|
956 |
st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
|
957 |
|
958 |
-
|
|
|
|
|
|
|
959 |
res3 = st.session_state.results_p3
|
960 |
-
|
961 |
-
|
962 |
-
st.subheader("Comprehensive Molecular Properties & AI-Powered Toxicity Prediction (Simulated)")
|
963 |
-
col1, col2 = st.columns(2)
|
964 |
-
with col1:
|
965 |
-
st.dataframe(res3.get('comp_props_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
966 |
-
with col2:
|
967 |
-
st.dataframe(res3.get('tox_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
968 |
-
with p3_tabs[1]:
|
969 |
-
st.subheader("Interactive 3D gallery of the compounds under analysis.")
|
970 |
-
if res3.get('combined_viz_html'):
|
971 |
-
st.components.v1.html(res3.get('combined_viz_html'), height=1000, scrolling=True)
|
972 |
-
|
973 |
-
|
974 |
-
# ===== TAB 4: POST-MARKET SURVEILLANCE =====
|
975 |
-
with tab4:
|
976 |
-
st.header("π Step 4: Regulatory Submission and Pharmacovigilance")
|
977 |
-
st.markdown("Explore summaries of the documentation needed for regulatory approval and simulate how AI can monitor real-world data for adverse events.")
|
978 |
-
|
979 |
-
with st.form(key="phase4_form"):
|
980 |
-
st.subheader("Analysis Controls")
|
981 |
-
rwd_input = st.text_area("Enter new adverse events (comma-separated)", value="severe allergic reaction, joint pain, severe allergic reaction", height=100, key="p4_rwd")
|
982 |
-
run_phase4_btn = st.form_submit_button("π Run Phase 4 Analysis", use_container_width=True)
|
983 |
-
|
984 |
-
if run_phase4_btn:
|
985 |
-
full_log = "--- Starting Phase 4 Analysis ---\n"
|
986 |
-
reg_df, log_reg = get_regulatory_summary()
|
987 |
-
full_log += log_reg
|
988 |
-
eth_df, log_eth = get_ethical_framework()
|
989 |
-
full_log += log_eth
|
990 |
-
|
991 |
-
rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
|
992 |
-
full_log += log_rwd
|
993 |
-
full_log += "\n--- Phase 4 Analysis Complete ---"
|
994 |
-
st.session_state.log_p4 = full_log
|
995 |
|
996 |
-
st.
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1002 |
|
1003 |
st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
|
1004 |
|
1005 |
-
|
|
|
|
|
|
|
1006 |
res4 = st.session_state.results_p4
|
1007 |
p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
|
|
|
1008 |
with p4_tabs[0]:
|
1009 |
st.subheader("Simulated Adverse Event Analysis")
|
1010 |
if res4.get('plot_bar'):
|
1011 |
-
st.
|
1012 |
st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
1013 |
|
1014 |
with p4_tabs[1]:
|
1015 |
-
|
1016 |
-
|
1017 |
-
|
1018 |
-
|
1019 |
-
|
1020 |
-
st.subheader("Ethical Framework for AI in Healthcare")
|
1021 |
-
st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
|
|
1 |
# AI-Powered Drug Discovery Pipeline Streamlit Application
|
2 |
# This script integrates four phases of drug discovery into a single, interactive web app.
|
|
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
|
|
|
|
6 |
import requests
|
7 |
import io
|
8 |
import re
|
|
|
37 |
page_title="AI Drug Discovery Pipeline",
|
38 |
page_icon="π¬",
|
39 |
layout="wide",
|
40 |
+
initial_sidebar_state="collapsed",
|
41 |
)
|
42 |
|
43 |
+
# Custom CSS for a professional, dark theme
|
44 |
def apply_custom_styling():
|
45 |
st.markdown(
|
46 |
"""
|
47 |
<style>
|
48 |
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
|
|
|
49 |
html, body, [class*="st-"] {
|
50 |
font-family: 'Roboto', sans-serif;
|
51 |
}
|
|
|
87 |
color: #FFF;
|
88 |
background-color: #00A0FF;
|
89 |
}
|
90 |
+
|
91 |
+
/* Ensure headers are white */
|
92 |
+
h1, h2, h3, h4, h5, h6 {
|
93 |
+
color: white !important;
|
94 |
+
}
|
95 |
</style>
|
96 |
""",
|
97 |
unsafe_allow_html=True
|
|
|
189 |
mol = Chem.MolFromSmiles(smiles)
|
190 |
if mol:
|
191 |
props = {
|
192 |
+
'Molecule': name,
|
193 |
'SMILES': smiles,
|
194 |
'MW': Descriptors.MolWt(mol),
|
195 |
'LogP': Descriptors.MolLogP(mol),
|
|
|
214 |
if df.empty:
|
215 |
return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
|
216 |
|
|
|
217 |
analysis_df = df.copy()
|
218 |
analysis_df['MW_OK'] = analysis_df['MW'] <= 500
|
219 |
analysis_df['LogP_OK'] = analysis_df['LogP'] <= 5
|
|
|
221 |
analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
|
222 |
analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
|
223 |
|
|
|
224 |
analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
|
225 |
|
|
|
226 |
display_df = df.copy()
|
227 |
display_df['Lipinski_Violations'] = analysis_df['Lipinski_Violations']
|
228 |
display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'β
Yes' if x else 'β No')
|
229 |
|
230 |
log = "β
Assessed drug-likeness using Lipinski's Rule of Five.\n"
|
231 |
|
|
|
232 |
return analysis_df, display_df, log
|
233 |
|
234 |
|
|
|
241 |
if df['Drug_Like'].dtype != bool:
|
242 |
return None, f"Cannot plot: 'Drug_Like' column must be boolean, but it is {df['Drug_Like'].dtype}."
|
243 |
|
|
|
244 |
df['Category'] = df['Drug_Like'].apply(lambda x: 'Drug-Like' if x else 'Non-Drug-Like')
|
245 |
source = ColumnDataSource(df)
|
246 |
|
247 |
+
colors = ['#00D4AA', '#FF6B6B']
|
|
|
248 |
color_mapper = factor_cmap('Category', palette=colors, factors=["Drug-Like", "Non-Drug-Like"])
|
249 |
|
|
|
250 |
scatter_hover = HoverTool(tooltips=[
|
251 |
+
("Compound", "@Molecule"), ("MW", "@MW{0.0} Da"), ("LogP", "@LogP{0.00}"),
|
252 |
+
("HBD", "@HBD"), ("HBA", "@HBA"), ("TPSA", "@TPSA{0.0} Γ
Β²"), ("Category", "@Category")
|
|
|
|
|
|
|
|
|
|
|
253 |
])
|
254 |
|
|
|
255 |
plot_config = {
|
256 |
+
'sizing_mode': 'scale_width', 'aspect_ratio': 1,
|
257 |
+
'background_fill_color': None, 'border_fill_color': None,
|
258 |
+
'outline_line_color': '#333333', 'min_border_left': 50,
|
259 |
+
'min_border_right': 50, 'min_border_top': 50, 'min_border_bottom': 50
|
|
|
|
|
|
|
|
|
|
|
260 |
}
|
261 |
|
262 |
def style_plot(p, x_label, y_label, title):
|
|
|
282 |
p.legend.background_fill_alpha = 0.8
|
283 |
p.legend.border_line_color = '#444444'
|
284 |
p.legend.label_text_color = '#FFFFFF'
|
|
|
285 |
p.legend.click_policy = "mute"
|
|
|
|
|
|
|
286 |
return p
|
287 |
|
|
|
288 |
p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
289 |
p1.scatter('MW', 'LogP', source=source, legend_group='Category',
|
290 |
color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
291 |
+
p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW β€ 500")
|
292 |
+
p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP β€ 5")
|
|
|
|
|
|
|
|
|
|
|
293 |
style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
|
294 |
|
|
|
295 |
p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
296 |
+
p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
297 |
+
p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD β€ 5")
|
298 |
+
p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA β€ 10")
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
|
300 |
|
|
|
301 |
p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
|
302 |
+
p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
|
303 |
+
p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA β€ 140")
|
304 |
+
p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds β€ 10")
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
style_plot(p3, "Topological Polar Surface Area (Γ
Β²)", "Rotatable Bonds", "Drug Permeability Indicators")
|
306 |
+
|
|
|
|
|
|
|
307 |
p4_config = plot_config.copy()
|
308 |
+
p4_config['tools'] = "hover"
|
309 |
p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
|
310 |
p4 = figure(title="Drug-Likeness Distribution", **p4_config)
|
|
|
|
|
311 |
counts = df['Category'].value_counts()
|
312 |
+
data = pd.DataFrame({'category': counts.index, 'value': counts.values})
|
313 |
+
data['angle'] = data['value']/data['value'].sum() * 2*pi
|
314 |
+
data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
|
316 |
+
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
|
317 |
line_color="white", line_width=3, fill_color='color',
|
318 |
+
legend_field='category', source=data)
|
319 |
+
p4.text([0], [0], text=[f"{len(df)}\nCompounds"], text_align="center", text_baseline="middle", text_color="white", text_font_size="14pt", text_font_style="bold")
|
320 |
+
donut_hover = HoverTool(tooltips=[("Category", "@category"), ("Count", "@value")], renderers=[wedge_renderer])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
p4.add_tools(donut_hover)
|
|
|
322 |
style_plot(p4, "", "", "Compound Classification")
|
323 |
p4.axis.visible = False
|
324 |
p4.grid.visible = False
|
325 |
|
326 |
+
grid = gridplot([[p1, p2], [p3, p4]], sizing_mode='scale_width', toolbar_location='right', merge_tools=True)
|
|
|
|
|
|
|
327 |
return grid, "β
Generated enhanced molecular properties dashboard."
|
328 |
|
329 |
# ===== Phase 2 Functions =====
|
330 |
def get_phase2_molecules():
|
331 |
"""Returns an expanded list of common drugs with corrected SMILES."""
|
332 |
return {
|
333 |
+
'Paracetamol': 'CC(=O)Nc1ccc(O)cc1', 'Ibuprofen': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
|
334 |
+
'Aspirin': 'CC(=O)Oc1ccccc1C(=O)O', 'Naproxen': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
|
335 |
+
'Diazepam': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12', 'Metformin': 'CN(C)C(=N)N=C(N)N',
|
336 |
+
'Loratadine': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1', 'Morphine': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
|
337 |
+
'Cetirizine': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO', 'Fluoxetine': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
|
338 |
+
'Amoxicillin': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C', 'Atorvastatin': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
|
339 |
+
'Ciprofloxacin': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12', 'Warfarin': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
'Furosemide': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
|
341 |
}
|
342 |
|
|
|
365 |
log += f"β
Predicted ADMET properties for {len(df)} molecules.\n"
|
366 |
return df, log
|
367 |
|
|
|
|
|
368 |
def visualize_molecule_2d_3d(smiles: str, name: str):
|
369 |
"""Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
|
370 |
log = ""
|
|
|
470 |
return combined_html, log
|
471 |
except Exception as e:
|
472 |
return f"<p>Error visualizing {name}: {e}</p>", f"β Error visualizing {name}: {e}"
|
473 |
+
|
474 |
+
def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
|
475 |
+
"""
|
476 |
+
Generates a protein-ligand interaction visualization using py3Dmol.
|
477 |
+
"""
|
478 |
+
if not pdb_data:
|
479 |
+
return None, "Cannot generate interaction view: No PDB data provided."
|
480 |
+
|
481 |
try:
|
482 |
+
viewer = py3Dmol.view(width='100%', height=650)
|
483 |
viewer.setBackgroundColor('#1C1C1C')
|
484 |
+
|
485 |
+
# Add the protein structure
|
486 |
viewer.addModel(pdb_data, "pdb")
|
487 |
+
|
488 |
+
# Style the protein (cartoon representation)
|
489 |
+
viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
|
490 |
+
|
491 |
+
# Highlight the ligand if specified
|
492 |
+
if ligand_resn:
|
493 |
+
viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
|
494 |
+
viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
|
495 |
+
|
496 |
+
# Add surface representation for binding site
|
497 |
+
viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
|
498 |
+
|
499 |
+
viewer.zoomTo({'resn': ligand_resn} if ligand_resn else {})
|
500 |
+
|
501 |
html = viewer._make_html()
|
502 |
+
log = f"β
Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
|
|
|
|
|
503 |
return html, log
|
504 |
+
|
505 |
except Exception as e:
|
506 |
+
return None, f"β Interaction visualization error: {e}"
|
507 |
+
|
508 |
# ===== Phase 3 Functions =====
|
509 |
def get_phase3_molecules():
|
510 |
return {
|
|
|
563 |
return pd.DataFrame(summary), "β
Generated AI/ML documentation summary."
|
564 |
|
565 |
def simulate_rwd_analysis(adverse_event_text):
|
566 |
+
"""
|
567 |
+
Analyzes simulated adverse event text and generates a DataFrame and Bokeh plot.
|
568 |
+
"""
|
569 |
np.random.seed(42)
|
570 |
+
base_events = list(np.random.choice(
|
571 |
+
['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever'],
|
572 |
+
100,
|
573 |
+
p=[0.25, 0.2, 0.15, 0.15, 0.1, 0.15]
|
574 |
+
))
|
575 |
+
|
576 |
+
user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
|
577 |
|
578 |
+
all_events = base_events + user_terms
|
|
|
579 |
|
580 |
+
events_df = pd.DataFrame(all_events, columns=['Adverse_Event'])
|
581 |
+
event_counts = events_df['Adverse_Event'].value_counts().nlargest(10).sort_values(ascending=False)
|
582 |
|
583 |
+
results_df = event_counts.reset_index()
|
584 |
+
results_df.columns = ['Adverse_Event', 'Frequency']
|
|
|
|
|
585 |
|
586 |
+
log = f"β
Analyzed {len(all_events)} total event reports. Identified {len(event_counts)} unique adverse events for plotting.\n"
|
587 |
|
588 |
+
# Create Bokeh Plot
|
589 |
+
source = ColumnDataSource(results_df)
|
590 |
+
y_range = results_df['Adverse_Event'].tolist()[::-1]
|
591 |
|
592 |
+
hover = HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")])
|
|
|
|
|
|
|
|
|
|
|
|
|
593 |
|
594 |
+
p = figure(
|
595 |
+
y_range=y_range, height=450, title="Top 10 Reported Adverse Events",
|
596 |
+
sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save",
|
597 |
+
)
|
598 |
+
p.add_tools(hover)
|
599 |
|
600 |
+
p.hbar(
|
601 |
+
y='Adverse_Event', right='Frequency', source=source, height=0.7,
|
602 |
+
color='#00A0FF', line_color='white', legend_label="Event Frequency"
|
603 |
+
)
|
604 |
|
605 |
+
# Style the plot for a dark theme
|
606 |
+
p.background_fill_color = "#1C1C1C"
|
607 |
+
p.border_fill_color = "#1C1C1C"
|
608 |
+
p.outline_line_color = '#333333'
|
609 |
+
p.title.text_color = "white"
|
610 |
+
p.title.text_font_size = '16pt'
|
611 |
+
p.title.align = "center"
|
612 |
+
p.xaxis.axis_label = "Frequency Count"
|
613 |
+
p.yaxis.axis_label = "Adverse Event"
|
614 |
+
p.axis.axis_label_text_color = "#CCCCCC"
|
615 |
+
p.axis.axis_label_text_font_size = "12pt"
|
616 |
+
p.axis.major_label_text_color = "#AAAAAA"
|
617 |
+
p.axis.major_label_text_font_size = "10pt"
|
618 |
+
p.grid.grid_line_alpha = 0.3
|
619 |
+
p.grid.grid_line_color = "#444444"
|
620 |
+
p.x_range.start = 0
|
621 |
+
p.legend.location = "top_right"
|
622 |
+
p.legend.background_fill_color = "#2A2A2A"
|
623 |
+
p.legend.background_fill_alpha = 0.7
|
624 |
+
p.legend.border_line_color = "#444444"
|
625 |
+
p.legend.label_text_color = "white"
|
626 |
+
|
627 |
+
return results_df, p, log
|
628 |
+
|
629 |
+
def get_ethical_framework():
|
630 |
+
framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
|
631 |
+
'Implementation Strategy': [
|
632 |
+
'AI models prioritize patient outcomes and clinical efficacy.',
|
633 |
+
'Toxicity prediction and pharmacovigilance models aim to minimize patient harm.',
|
634 |
+
'Algorithms are audited for demographic bias in training data and predictions.',
|
635 |
+
'Model cards and SHAP values are provided for key decision-making processes.'
|
636 |
+
]}
|
637 |
+
return pd.DataFrame(framework), "β
Generated Ethical AI Framework summary."
|
638 |
+
|
639 |
+
# --- 3. Streamlit UI Layout ---
|
640 |
+
|
641 |
+
# Initialize session state variables
|
642 |
+
if 'active_tab' not in st.session_state: st.session_state.active_tab = "Phase 1: Target Identification"
|
643 |
+
if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
|
644 |
+
if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
|
645 |
+
if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
|
646 |
+
if 'log_p4' not in st.session_state: st.session_state.log_p4 = "Status logs will appear here."
|
647 |
if 'results_p1' not in st.session_state: st.session_state.results_p1 = {}
|
|
|
648 |
if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
|
|
|
649 |
if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
|
|
|
650 |
if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
|
651 |
|
652 |
+
# --- Header ---
|
653 |
+
st.title("π¬ AI-Powered Drug Discovery Pipeline")
|
654 |
+
st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
|
655 |
+
|
656 |
+
# --- Main Tabs for Each Phase ---
|
657 |
tab1, tab2, tab3, tab4 = st.tabs([
|
658 |
+
"**Phase 1:** Target Identification",
|
659 |
+
"**Phase 2:** Hit Discovery & ADMET",
|
660 |
+
"**Phase 3:** Lead Optimization",
|
661 |
+
"**Phase 4:** Pre-clinical & RWE"
|
662 |
])
|
663 |
|
664 |
+
# --- Phase 1: Target Identification ---
|
665 |
with tab1:
|
666 |
+
st.header("Phase 1: Target Identification & Initial Analysis")
|
667 |
+
st.markdown("""
|
668 |
+
In this initial phase, we identify and analyze a biological target (e.g., a protein) implicated in a disease.
|
669 |
+
We fetch its 3D structure and sequence data, then evaluate a set of initial compounds for their drug-like properties.
|
670 |
+
""")
|
671 |
+
|
672 |
+
st.subheader("Inputs & Controls")
|
673 |
+
|
674 |
+
pdb_id_input = st.text_input("Enter PDB ID (e.g., 2HU4 for Neuraminidase)", "2HU4")
|
675 |
+
protein_id_input = st.text_input("Enter NCBI Protein ID (e.g., P03468 for Neuraminidase)", "P03468")
|
676 |
+
|
677 |
+
st.markdown("---")
|
678 |
+
st.write("**Analyze Sample Compounds:**")
|
679 |
+
sample_molecules = create_sample_molecules()
|
680 |
+
selected_molecules = st.multiselect(
|
681 |
+
"Select from known drugs:",
|
682 |
+
options=list(sample_molecules.keys()),
|
683 |
+
default=["Oseltamivir", "Aspirin"]
|
684 |
+
)
|
685 |
+
|
686 |
+
if st.button("π Run Phase 1 Analysis", key="run_p1"):
|
687 |
+
with st.spinner("Fetching data and calculating properties..."):
|
688 |
+
full_log = "--- Phase 1 Analysis Started ---\n"
|
689 |
+
|
690 |
+
pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
|
691 |
+
full_log += log_pdb
|
692 |
+
log_fasta = fetch_fasta_sequence(protein_id_input)
|
693 |
+
full_log += log_fasta
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
|
695 |
+
smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
|
696 |
+
properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
|
697 |
full_log += log_props
|
698 |
|
699 |
+
analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
|
700 |
+
full_log += log_likeness
|
701 |
|
702 |
+
protein_view_html, log_3d = visualize_protein_3d(pdb_data, title=f"PDB: {pdb_id_input}")
|
703 |
+
full_log += log_3d
|
704 |
+
|
705 |
+
dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
|
706 |
+
full_log += log_dash
|
707 |
+
|
708 |
+
full_log += "\n--- Phase 1 Analysis Complete ---"
|
709 |
+
st.session_state.log_p1 = full_log
|
710 |
|
|
|
|
|
|
|
711 |
st.session_state.results_p1 = {
|
712 |
+
'pdb_data': pdb_data,
|
713 |
+
'protein_view': protein_view_html,
|
714 |
+
'properties_df': display_df,
|
715 |
+
'dashboard': dashboard_plot
|
|
|
716 |
}
|
|
|
|
|
|
|
|
|
|
|
717 |
|
718 |
st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
|
719 |
+
|
720 |
+
st.subheader("Results")
|
721 |
+
if not st.session_state.results_p1:
|
722 |
+
st.info("Click 'Run Phase 1 Analysis' to generate and display results.")
|
723 |
+
else:
|
724 |
res1 = st.session_state.results_p1
|
725 |
+
p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
|
726 |
+
|
727 |
with p1_tabs[0]:
|
728 |
+
st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
|
729 |
+
if res1.get('protein_view'):
|
730 |
+
st.components.v1.html(res1['protein_view'], height=600, scrolling=False)
|
|
|
731 |
else:
|
732 |
+
st.warning("Could not display 3D structure. Check PDB ID and logs.")
|
733 |
+
|
734 |
with p1_tabs[1]:
|
735 |
+
st.subheader("Physicochemical Properties Analysis")
|
736 |
+
if res1.get('dashboard'):
|
737 |
+
st.bokeh_chart(res1['dashboard'], use_container_width=True)
|
738 |
+
st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
739 |
+
|
740 |
+
|
741 |
+
# --- Phase 2: Hit Discovery & ADMET ---
|
|
|
|
|
|
|
|
|
|
|
742 |
with tab2:
|
743 |
+
st.header("Phase 2: Virtual Screening & Early ADMET")
|
744 |
+
st.markdown("""
|
745 |
+
This phase simulates a virtual screening process to identify 'hits' from a larger library of compounds.
|
746 |
+
We predict their binding affinity to the target and assess their basic ADMET (Absorption, Distribution,
|
747 |
+
Metabolism, Excretion, Toxicity) profiles.
|
748 |
+
""")
|
749 |
+
|
750 |
+
st.subheader("Inputs & Controls")
|
751 |
+
|
752 |
+
p2_molecules = get_phase2_molecules()
|
753 |
+
st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
|
754 |
+
|
755 |
+
p2_pdb_id = st.text_input("Enter PDB ID for Interaction (e.g., 2HU4)", "2HU4", key="p2_pdb")
|
756 |
+
p2_ligand_resn = st.text_input("Ligand Residue Name in PDB (e.g., G39 for Oseltamivir)", "G39", key="p2_ligand")
|
757 |
+
|
758 |
+
if st.button("π Run Phase 2 Analysis", key="run_p2"):
|
759 |
+
with st.spinner("Running virtual screening and ADMET predictions..."):
|
760 |
+
full_log = "--- Phase 2 Analysis Started ---\n"
|
761 |
|
762 |
+
screening_df, log_screen = simulate_virtual_screening(p2_molecules)
|
763 |
+
full_log += log_screen
|
764 |
+
admet_df, log_admet = predict_admet_properties(p2_molecules)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
765 |
full_log += log_admet
|
766 |
|
767 |
+
merged_df = pd.merge(screening_df, admet_df, on="Molecule")
|
768 |
+
|
769 |
+
pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
|
770 |
+
full_log += log_pdb_p2
|
771 |
+
|
772 |
+
interaction_view, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
|
773 |
+
full_log += log_interact
|
|
|
|
|
|
|
|
|
|
|
774 |
|
775 |
+
full_log += "\n--- Phase 2 Analysis Complete ---"
|
776 |
+
st.session_state.log_p2 = full_log
|
777 |
st.session_state.results_p2 = {
|
778 |
+
'merged_df': merged_df,
|
779 |
+
'interaction_view': interaction_view
|
|
|
|
|
|
|
780 |
}
|
781 |
+
|
|
|
|
|
|
|
|
|
|
|
782 |
st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
|
783 |
|
784 |
+
st.subheader("Results")
|
785 |
+
if not st.session_state.results_p2:
|
786 |
+
st.info("Click 'Run Phase 2 Analysis' to generate and display results.")
|
787 |
+
else:
|
788 |
res2 = st.session_state.results_p2
|
789 |
+
p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
|
790 |
+
|
791 |
with p2_tabs[0]:
|
792 |
+
st.subheader("Virtual Screening & Early ADMET Predictions")
|
793 |
+
st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
794 |
+
|
|
|
|
|
|
|
|
|
795 |
with p2_tabs[1]:
|
796 |
+
st.subheader(f"Simulated Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
|
797 |
+
if res2.get('interaction_view'):
|
798 |
+
st.components.v1.html(res2['interaction_view'], height=700, scrolling=False)
|
|
|
799 |
else:
|
800 |
+
st.warning("Could not display interaction view. Check inputs and logs.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
801 |
|
802 |
+
# --- Phase 3: Lead Optimization ---
|
803 |
with tab3:
|
804 |
+
st.header("Phase 3: Lead Compound Optimization")
|
805 |
+
st.markdown("""
|
806 |
+
In lead optimization, promising 'hit' compounds are refined to improve their efficacy and safety.
|
807 |
+
Here, we analyze a few selected lead candidates, perform more detailed property calculations,
|
808 |
+
and predict their toxicity risk using a simulated machine learning model.
|
809 |
+
""")
|
810 |
+
|
811 |
+
st.subheader("Inputs & Controls")
|
812 |
+
|
813 |
+
p3_molecules = get_phase3_molecules()
|
814 |
+
selected_leads = st.multiselect(
|
815 |
+
"Select lead compounds to optimize:",
|
816 |
+
options=list(p3_molecules.keys()),
|
817 |
+
default=['Oseltamivir', 'Remdesivir']
|
818 |
+
)
|
819 |
+
|
820 |
+
if st.button("π Run Phase 3 Analysis", key="run_p3"):
|
821 |
+
with st.spinner("Analyzing lead compounds and predicting toxicity..."):
|
822 |
+
full_log = "--- Phase 3 Analysis Started ---\n"
|
823 |
+
|
824 |
+
smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
|
825 |
+
|
826 |
+
comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
|
827 |
+
full_log += log_comp
|
828 |
+
|
829 |
+
toxicity_df, log_tox = predict_toxicity(comp_props_df)
|
830 |
+
full_log += log_tox
|
831 |
+
|
832 |
+
final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
|
833 |
+
|
834 |
+
visuals = {}
|
835 |
+
for name, smiles in smiles_to_analyze_p3.items():
|
836 |
+
html_view, log_vis = visualize_molecule_2d_3d(smiles, name)
|
837 |
+
visuals[name] = html_view
|
838 |
+
full_log += log_vis
|
839 |
+
|
840 |
+
full_log += "\n--- Phase 3 Analysis Complete ---"
|
841 |
+
st.session_state.log_p3 = full_log
|
842 |
+
st.session_state.results_p3 = {
|
843 |
+
'final_df': final_df,
|
844 |
+
'visuals': visuals
|
845 |
+
}
|
846 |
|
847 |
st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
|
848 |
|
849 |
+
st.subheader("Results")
|
850 |
+
if not st.session_state.results_p3:
|
851 |
+
st.info("Click 'Run Phase 3 Analysis' to generate and display results.")
|
852 |
+
else:
|
853 |
res3 = st.session_state.results_p3
|
854 |
+
st.subheader("Lead Compound Analysis & Toxicity Prediction")
|
855 |
+
st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
856 |
|
857 |
+
st.subheader("2D & 3D Molecular Structures")
|
858 |
+
for name, visual_html in res3.get('visuals', {}).items():
|
859 |
+
st.components.v1.html(visual_html, height=430, scrolling=False)
|
860 |
+
|
861 |
+
|
862 |
+
# --- Phase 4: Pre-clinical & RWE ---
|
863 |
+
with tab4:
|
864 |
+
st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
|
865 |
+
st.markdown("""
|
866 |
+
This final phase simulates post-market analysis. We analyze text data for adverse events (pharmacovigilance)
|
867 |
+
and present documentation related to the AI models and ethical frameworks that would be required for regulatory submission.
|
868 |
+
""")
|
869 |
+
|
870 |
+
st.subheader("Inputs & Controls")
|
871 |
+
|
872 |
+
rwd_input = st.text_area(
|
873 |
+
"Enter simulated adverse event report text:",
|
874 |
+
"Patient reports include instances of headache, severe nausea, and occasional skin rash. Some noted dizziness after taking the medication.",
|
875 |
+
height=150
|
876 |
+
)
|
877 |
+
|
878 |
+
if st.button("π Run Phase 4 Analysis", key="run_p4"):
|
879 |
+
with st.spinner("Analyzing real-world data and generating reports..."):
|
880 |
+
full_log = "--- Phase 4 Analysis Started ---\n"
|
881 |
+
|
882 |
+
reg_df, log_reg = get_regulatory_summary()
|
883 |
+
full_log += log_reg
|
884 |
+
|
885 |
+
eth_df, log_eth = get_ethical_framework()
|
886 |
+
full_log += log_eth
|
887 |
+
|
888 |
+
rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
|
889 |
+
full_log += log_rwd
|
890 |
+
full_log += "\n--- Phase 4 Analysis Complete ---"
|
891 |
+
st.session_state.log_p4 = full_log
|
892 |
+
|
893 |
+
st.session_state.results_p4 = {
|
894 |
+
'rwd_df': rwd_df,
|
895 |
+
'plot_bar': plot_bar,
|
896 |
+
'reg_df': reg_df,
|
897 |
+
'eth_df': eth_df
|
898 |
+
}
|
899 |
|
900 |
st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
|
901 |
|
902 |
+
st.subheader("Results")
|
903 |
+
if not st.session_state.results_p4:
|
904 |
+
st.info("Click 'Run Phase 4 Analysis' to generate and display results.")
|
905 |
+
else:
|
906 |
res4 = st.session_state.results_p4
|
907 |
p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
|
908 |
+
|
909 |
with p4_tabs[0]:
|
910 |
st.subheader("Simulated Adverse Event Analysis")
|
911 |
if res4.get('plot_bar'):
|
912 |
+
st.bokeh_chart(res4['plot_bar'], use_container_width=True)
|
913 |
st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
914 |
|
915 |
with p4_tabs[1]:
|
916 |
+
st.subheader("AI/ML Model Regulatory Summary")
|
917 |
+
st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
918 |
+
|
919 |
+
st.subheader("Ethical AI Framework")
|
920 |
+
st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)
|
|
|
|