Spaces:

alidenewade
/

drug-discovery-pipeline

Running

App Files Files Community

alidenewade commited on 27 days ago

Commit

430eb42

verified ·

1 Parent(s): 7198ea6

Update app.py

Browse files

Files changed (1) hide show

app.py +453 -142

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ from sklearn.model_selection import train_test_split
 # 3D Visualization
 import py3Dmol
-from stmol import showmol # Import the new component
 # Bokeh plotting
 from bokeh.plotting import figure
@@ -150,28 +149,29 @@ def fetch_fasta_sequence(protein_id: str):
         log += f"❌ An error occurred while fetching FASTA data: {e}\n"
         return log
-# REFACTORED: This function now returns a py3Dmol viewer object, not HTML
-def visualize_protein_3d(pdb_data: str):
     """
     Generates an interactive 3D protein visualization using py3Dmol.
     """
     if not pdb_data:
         return None, "Cannot generate 3D view: No PDB data provided."
     try:
-        viewer = py3Dmol.view(width=700, height=600)
         viewer.setBackgroundColor('#1C1C1C')
         viewer.addModel(pdb_data, "pdb")
         viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
         viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'})
         viewer.zoomTo()
-        log = f"✅ Generated 3D visualization object.\n"
-        return viewer, log
     except Exception as e:
         return None, f"❌ 3D visualization error: {e}"
 def create_sample_molecules():
     """
     Returns a dictionary of sample molecules in Name:SMILES format.
     """
     return {
         "Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C",
@@ -181,9 +181,9 @@ def create_sample_molecules():
         "Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor
         "Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N",
         "Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1",
-        "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
-        "Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1",
-        "Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C",
     }
 def calculate_molecular_properties(smiles_dict: dict):
@@ -196,9 +196,13 @@ def calculate_molecular_properties(smiles_dict: dict):
         mol = Chem.MolFromSmiles(smiles)
         if mol:
             props = {
-                'Molecule': name, 'SMILES': smiles, 'MW': Descriptors.MolWt(mol),
-                'LogP': Descriptors.MolLogP(mol), 'HBD': Descriptors.NumHDonors(mol),
-                'HBA': Descriptors.NumHAcceptors(mol), 'TPSA': Descriptors.TPSA(mol),
                 'RotBonds': Descriptors.NumRotatableBonds(mol),
             }
             properties.append(props)
@@ -212,6 +216,7 @@ def calculate_molecular_properties(smiles_dict: dict):
 def assess_drug_likeness(df: pd.DataFrame):
     """
     Assesses drug-likeness based on Lipinski's Rule of Five.
     """
     if df.empty:
         return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
@@ -222,6 +227,7 @@ def assess_drug_likeness(df: pd.DataFrame):
     analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5
     analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
     analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
     analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
     display_df = df.copy()
@@ -229,10 +235,13 @@ def assess_drug_likeness(df: pd.DataFrame):
     display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: '✅ Yes' if x else '❌ No')
     log = "✅ Assessed drug-likeness using Lipinski's Rule of Five.\n"
     return analysis_df, display_df, log
 def plot_properties_dashboard(df: pd.DataFrame):
-    """Creates a 2x2 dashboard of molecular property visualizations using Bokeh."""
     if df.empty or 'Drug_Like' not in df.columns:
         return None, "Cannot plot: No analysis data or 'Drug_Like' column missing."
@@ -251,56 +260,89 @@ def plot_properties_dashboard(df: pd.DataFrame):
     ])
     plot_config = {
-        'sizing_mode': 'scale_width', 'aspect_ratio': 1, 'background_fill_color': None,
-        'border_fill_color': None, 'outline_line_color': '#333333', 'min_border': 50
     }
     def style_plot(p, x_label, y_label, title):
-        p.title.text, p.title.text_color, p.title.text_font_size, p.title.text_font_style = title, '#FFFFFF', '14pt', 'bold'
-        p.xaxis.axis_label, p.yaxis.axis_label, p.axis.axis_label_text_color = x_label, y_label, '#CCCCCC'
-        p.axis.axis_label_text_font_size, p.axis.major_label_text_color = '11pt', '#AAAAAA'
-        p.grid.grid_line_color, p.grid.grid_line_alpha = '#2A2A2A', 0.3
         if p.legend:
-            p.legend.location, p.legend.background_fill_color, p.legend.border_line_color = "top_right", '#1A1A1A', '#444444'
-            p.legend.label_text_color, p.legend.click_policy = '#FFFFFF', "mute"
         return p
-    p1 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
-    p1.scatter('MW', 'LogP', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
-    p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2)
-    p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2)
     style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
-    p2 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
-    p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
-    p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2)
-    p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2)
     style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
-    p3 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
-    p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
-    p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2)
-    p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2)
     style_plot(p3, "Topological Polar Surface Area (Å²)", "Rotatable Bonds", "Drug Permeability Indicators")
     p4_config = plot_config.copy()
-    p4_config['tools'], p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)}) = "hover", {}
     p4 = figure(title="Drug-Likeness Distribution", **p4_config)
     counts = df['Category'].value_counts()
-    data = pd.DataFrame(counts).reset_index()
-    data.columns = ['category', 'value']
     data['angle'] = data['value']/data['value'].sum() * 2*pi
-    data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in data['category']]
     data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1)
-    drug_like_percentage = (df['Drug_Like'].sum() / len(df) * 100) if len(df) > 0 else 0
     wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
                      start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
-                     line_color="white", fill_color='color', legend_field='category', source=data)
-    p4.add_tools(HoverTool(tooltips=[("Category", "@category"), ("Count", "@value"), ("Percentage", "@percentage{%0.1f}%%")], renderers=[wedge_renderer]))
-    p4.text(0, 0, text=[f"{len(df)}\nCompounds\n({drug_like_percentage:.1f}% Drug-Like)"],
             text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold")
     style_plot(p4, "", "", "Compound Classification")
@@ -312,18 +354,32 @@ def plot_properties_dashboard(df: pd.DataFrame):
 # ===== Phase 2 Functions =====
 def get_phase2_molecules():
     return {
-        'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1', 'Ibuprofen (Pain/Inflammation)': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
-        'Aspirin (Pain/Antiplatelet)': 'CC(=O)Oc1ccccc1C(=O)O', 'Naproxen (Pain/Inflammation)': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
-        'Diazepam (Anxiolytic)': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12', 'Metformin (Diabetes)': 'CN(C)C(=N)N=C(N)N',
-        'Loratadine (Antihistamine)': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1', 'Morphine (Opioid Analgesic)': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
-        'Cetirizine (Antihistamine)': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO', 'Fluoxetine (Antidepressant)': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
-        'Amoxicillin (Antibiotic)': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C', 'Atorvastatin (Cholesterol)': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
-        'Ciprofloxacin (Antibiotic)': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12', 'Warfarin (Anticoagulant)': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
-        'Furosemide (Diuretic)': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N', 'Sildenafil (Erectile Dysfunction)': 'CCCC1=NN(C)C(=NC1=O)c1cc(N2CCN(C)CC2)c(OC)cc1S(=O)(=O)C',
-        'Omeprazole (GERD)': 'COc1ccc(C)c(c1NC(=O)c1cn(Cc2ccc(OC)cc2)cn1)OC', 'Losartan (Hypertension)': 'Cc1cnc(n1C)c1ccc(cc1)-c1ccccc1COC(=O)c1ccccc1',
     }
 def simulate_virtual_screening(smiles_dict: dict):
     np.random.seed(42)
     scores = np.random.uniform(2.0, 9.8, len(smiles_dict))
@@ -348,25 +404,83 @@ def predict_admet_properties(smiles_dict: dict):
     log += f"✅ Predicted ADMET properties for {len(df)} molecules.\n"
     return df, log
-# REFACTORED: This function now returns a 2D image (as bytes) and a 3D viewer object
-@st.cache_data
-def generate_molecule_visuals(smiles: str):
-    """Generates a 2D image and a 3D py3Dmol viewer object for a single molecule."""
     try:
         mol = Chem.MolFromSmiles(smiles)
-        if not mol: return None, None, "Invalid SMILES"
-        # 2D SVG Image
         drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300)
         drawer.drawOptions().clearBackground = False
-        drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11)
         drawer.DrawMolecule(mol)
         drawer.FinishDrawing()
         svg_2d = drawer.GetDrawingText().replace('svg:', '')
-        # Simple color replacement for dark theme
-        svg_2d = svg_2d.replace('black', 'white')
-        # 3D Viewer Object
         mol_3d = Chem.AddHs(mol)
         AllChem.EmbedMolecule(mol_3d, randomSeed=42)
         AllChem.MMFFOptimizeMolecule(mol_3d)
@@ -377,48 +491,75 @@ def generate_molecule_visuals(smiles: str):
         viewer.addModel(sdf_data, "sdf")
         viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}})
         viewer.zoomTo()
-        log = f"✅ Generated 2D/3D views.\n"
-        return svg_2d, viewer, log
     except Exception as e:
-        return None, None, f"❌ Error visualizing molecule: {e}"
-# REFACTORED: This function now returns a py3Dmol viewer object, not HTML
 def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
     """
     Generates a protein-ligand interaction visualization using py3Dmol.
     """
     if not pdb_data:
         return None, "Cannot generate interaction view: No PDB data provided."
     try:
-        viewer = py3Dmol.view(width=700, height=650)
         viewer.setBackgroundColor('#1C1C1C')
         viewer.addModel(pdb_data, "pdb")
         viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
         if ligand_resn:
             viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
             viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
-            viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
-            viewer.zoomTo({'resn': ligand_resn})
-        else:
-            viewer.zoomTo()
         log = f"✅ Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
-        return viewer, log
     except Exception as e:
         return None, f"❌ Interaction visualization error: {e}"
 # ===== Phase 3 Functions =====
 def get_phase3_molecules():
     return {
         'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C',
         'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O',
         'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4',
         'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C',
         "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
-        "Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C",
         "Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C",
-        "Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O",
     }
 def calculate_comprehensive_properties(smiles_dict: dict):
@@ -463,48 +604,90 @@ def predict_toxicity(properties_df: pd.DataFrame):
 # ===== Phase 4 Functions =====
 def get_regulatory_summary():
     summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'],
-               'Description': ['Data sourced from ChEMBL, PDB, GISAID.',
-                               'GCN (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).',
                                'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.',
                                'SHAP used for patient stratification model outputs.']}
     return pd.DataFrame(summary), "✅ Generated AI/ML documentation summary."
 def simulate_rwd_analysis(adverse_event_text):
     np.random.seed(42)
     base_events = list(np.random.choice(
-        ['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea'], 100,
-        p=[0.2, 0.15, 0.12, 0.12, 0.1, 0.08, 0.23]
     ))
     user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
     all_events = base_events + user_terms
-    event_counts = pd.Series(all_events).value_counts().nlargest(10)
     results_df = event_counts.reset_index()
     results_df.columns = ['Adverse_Event', 'Frequency']
-    log = f"✅ Analyzed {len(all_events)} total event reports.\n"
     source = ColumnDataSource(results_df)
-    p = figure(y_range=results_df['Adverse_Event'].tolist()[::-1], height=450, title="Top 10 Reported Adverse Events",
-               sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save")
-    p.add_tools(HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")]))
-    p.hbar(y='Adverse_Event', right='Frequency', source=source, height=0.7, color='#00A0FF', line_color='white')
     p.background_fill_color = "#1C1C1C"
     p.border_fill_color = "#1C1C1C"
     p.title.text_color = "white"
-    p.axis.axis_label_text_color, p.axis.major_label_text_color = "#CCCCCC", "#AAAAAA"
-    p.grid.grid_line_alpha, p.grid.grid_line_color, p.x_range.start = 0.3, "#444444", 0
     return results_df, p, log
 def get_ethical_framework():
     framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
                  'Implementation Strategy': [
                      'AI models prioritize patient outcomes and clinical efficacy.',
-                     'Toxicity prediction models aim to minimize patient harm.',
-                     'Algorithms are audited for demographic bias.',
                      'Model cards and SHAP values are provided for key decision-making processes.'
                  ]}
     return pd.DataFrame(framework), "✅ Generated Ethical AI Framework summary."
 # --- 3. Streamlit UI Layout ---
 if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
 if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
 if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
@@ -514,179 +697,307 @@ if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
 if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
 if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
 st.title("🔬 AI-Powered Drug Discovery Pipeline")
 st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
 tab1, tab2, tab3, tab4 = st.tabs([
-    "**Phase 1:** Target Identification", "**Phase 2:** Hit Discovery & ADMET",
-    "**Phase 3:** Lead Optimization", "**Phase 4:** Pre-clinical & RWE"
 ])
 # --- Phase 1: Target Identification ---
 with tab1:
     st.header("Phase 1: Target Identification & Initial Analysis")
-    pdb_options = {"Neuraminidase (2HU4)": "2HU4", "KRAS G12D (7XKJ)": "7XKJ", "SARS-CoV-2 Mpro (8HUR)": "8HUR", "EGFR Kinase (1M17)": "1M17"}
-    protein_options = {"Neuraminidase (P03468)": "P03468", "KRAS (P01116)": "P01116", "SARS-CoV-2 Mpro (P0DTD1)": "P0DTD1", "EGFR (P00533)": "P00533"}
     selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0)
     pdb_id_input = pdb_options[selected_pdb_name]
     selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0)
     protein_id_input = protein_options[selected_protein_name]
     st.markdown("---")
     sample_molecules = create_sample_molecules()
     selected_molecules = st.multiselect(
-        "Select from known drugs:", options=list(sample_molecules.keys()),
-        default=["Oseltamivir (Influenza)", "Aspirin (Pain/Inflammation)", "Imatinib (Gleevec - Cancer)"]
     )
     if st.button("🚀 Run Phase 1 Analysis", key="run_p1"):
-        with st.spinner("Running Phase 1..."):
             full_log = "--- Phase 1 Analysis Started ---\n"
             pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
             full_log += log_pdb
-            full_log += fetch_fasta_sequence(protein_id_input)
             smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
             properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
             full_log += log_props
             analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
             full_log += log_likeness
-            protein_viewer, log_3d = visualize_protein_3d(pdb_data)
             full_log += log_3d
             dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
             full_log += log_dash
-            st.session_state.log_p1 = full_log + "\n--- Phase 1 Analysis Complete ---"
-            st.session_state.results_p1 = {'protein_viewer': protein_viewer, 'properties_df': display_df, 'dashboard': dashboard_plot}
-    st.text_area("Status & Logs", st.session_state.log_p1, height=200)
-    if st.session_state.results_p1:
         res1 = st.session_state.results_p1
         p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
         with p1_tabs[0]:
             st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
-            if res1.get('protein_viewer'):
-                showmol(res1['protein_viewer'], height=600, width=700)
         with p1_tabs[1]:
             st.subheader("Physicochemical Properties Analysis")
             st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
             if res1.get('dashboard'):
                 st.bokeh_chart(res1['dashboard'], use_container_width=True)
 # --- Phase 2: Hit Discovery & ADMET ---
 with tab2:
     st.header("Phase 2: Virtual Screening & Early ADMET")
     p2_molecules = get_phase2_molecules()
     st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
     interaction_pdb_options = {
-        "Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"}, "KRAS G12C + MRTX-1133 (7XKJ)": {"pdb": "7XKJ", "ligand": "M13"},
-        "SARS-CoV-2 Mpro + Ensitrelvir (8HUR)": {"pdb": "8HUR", "ligand": "X77"}, "EGFR + Erlotinib (1M17)": {"pdb": "1M17", "ligand": "ERL"},
     }
-    selected_interaction_pdb_name = st.selectbox("Select PDB ID for Interaction:", options=list(interaction_pdb_options.keys()), index=0)
     p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"]
     p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"]
     if st.button("🚀 Run Phase 2 Analysis", key="run_p2"):
-        with st.spinner("Running Phase 2..."):
             full_log = "--- Phase 2 Analysis Started ---\n"
             screening_df, log_screen = simulate_virtual_screening(p2_molecules)
             full_log += log_screen
             admet_df, log_admet = predict_admet_properties(p2_molecules)
             full_log += log_admet
             merged_df = pd.merge(screening_df, admet_df, on="Molecule")
             pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
             full_log += log_pdb_p2
-            interaction_viewer, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
             full_log += log_interact
-            st.session_state.log_p2 = full_log + "\n--- Phase 2 Analysis Complete ---"
-            st.session_state.results_p2 = {'merged_df': merged_df, 'interaction_viewer': interaction_viewer}
-    st.text_area("Status & Logs", st.session_state.log_p2, height=200)
-    if st.session_state.results_p2:
         res2 = st.session_state.results_p2
         p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
         with p2_tabs[0]:
             st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p2_tabs[1]:
-            st.subheader(f"Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
-            if res2.get('interaction_viewer'):
-                showmol(res2['interaction_viewer'], height=650, width=700)
 # --- Phase 3: Lead Optimization ---
 with tab3:
     st.header("Phase 3: Lead Compound Optimization")
     p3_molecules = get_phase3_molecules()
     selected_leads = st.multiselect(
-        "Select lead compounds to optimize:", options=list(p3_molecules.keys()),
-        default=['Oseltamivir (Influenza)', 'Remdesivir (Antiviral)', 'Imatinib (Gleevec - Cancer)']
     )
     if st.button("🚀 Run Phase 3 Analysis", key="run_p3"):
-        with st.spinner("Running Phase 3..."):
             full_log = "--- Phase 3 Analysis Started ---\n"
             smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
             comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
             full_log += log_comp
             toxicity_df, log_tox = predict_toxicity(comp_props_df)
             full_log += log_tox
             final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
-            visuals = {name: generate_molecule_visuals(smiles) for name, smiles in smiles_to_analyze_p3.items()}
-            st.session_state.log_p3 = full_log + "\n--- Phase 3 Analysis Complete ---"
-            st.session_state.results_p3 = {'final_df': final_df, 'visuals': visuals}
-    st.text_area("Status & Logs", st.session_state.log_p3, height=200)
-    if st.session_state.results_p3:
         res3 = st.session_state.results_p3
         st.subheader("Lead Compound Analysis & Toxicity Prediction")
         st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         st.subheader("2D & 3D Molecular Structures")
-        for name, (svg_2d, viewer_3d, log) in res3.get('visuals', {}).items():
-            st.markdown(f"#### {name}")
-            col1, col2 = st.columns(2)
-            with col1:
-                st.markdown("##### 2D Structure")
-                if svg_2d:
-                    st.image(svg_2d)
-            with col2:
-                st.markdown("##### 3D Structure")
-                if viewer_3d:
-                    showmol(viewer_3d, height=300, width=400)
 # --- Phase 4: Pre-clinical & RWE ---
 with tab4:
     st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
     rwd_input = st.text_area(
         "Enter simulated adverse event report text:",
-        "Patient reports include instances of headache, severe nausea, and occasional skin rash.", height=150
     )
     if st.button("🚀 Run Phase 4 Analysis", key="run_p4"):
-        with st.spinner("Running Phase 4..."):
             full_log = "--- Phase 4 Analysis Started ---\n"
             reg_df, log_reg = get_regulatory_summary()
             full_log += log_reg
             eth_df, log_eth = get_ethical_framework()
             full_log += log_eth
             rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
             full_log += log_rwd
-            st.session_state.log_p4 = full_log + "\n--- Phase 4 Analysis Complete ---"
-            st.session_state.results_p4 = {'rwd_df': rwd_df, 'plot_bar': plot_bar, 'reg_df': reg_df, 'eth_df': eth_df}
-    st.text_area("Status & Logs", st.session_state.log_p4, height=200)
-    if st.session_state.results_p4:
         res4 = st.session_state.results_p4
         p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
         with p4_tabs[0]:
             if res4.get('plot_bar'):
                 st.bokeh_chart(res4['plot_bar'], use_container_width=True)
             st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p4_tabs[1]:
             st.subheader("AI/ML Model Regulatory Summary")
             st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
             st.subheader("Ethical AI Framework")
-            st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)

 # 3D Visualization
 import py3Dmol
 # Bokeh plotting
 from bokeh.plotting import figure
         log += f"❌ An error occurred while fetching FASTA data: {e}\n"
         return log
+def visualize_protein_3d(pdb_data: str, title="Protein 3D Structure"):
     """
     Generates an interactive 3D protein visualization using py3Dmol.
     """
     if not pdb_data:
         return None, "Cannot generate 3D view: No PDB data provided."
     try:
+        viewer = py3Dmol.view(width='100%', height=600)
         viewer.setBackgroundColor('#1C1C1C')
         viewer.addModel(pdb_data, "pdb")
         viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
         viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'})
         viewer.zoomTo()
+        html = viewer._make_html()
+        log = f"✅ Generated 3D visualization for {title}."
+        return html, log
     except Exception as e:
         return None, f"❌ 3D visualization error: {e}"
 def create_sample_molecules():
     """
     Returns a dictionary of sample molecules in Name:SMILES format.
+    Expanded list for more comprehensive demonstration.
     """
     return {
         "Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C",
         "Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor
         "Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N",
         "Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1",
+        "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1", # Complex structure, tyrosine kinase inhibitor
+        "Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1", # Calcium channel blocker
+        "Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C", # Statin
     }
 def calculate_molecular_properties(smiles_dict: dict):
         mol = Chem.MolFromSmiles(smiles)
         if mol:
             props = {
+                'Molecule': name,
+                'SMILES': smiles,
+                'MW': Descriptors.MolWt(mol),
+                'LogP': Descriptors.MolLogP(mol),
+                'HBD': Descriptors.NumHDonors(mol),
+                'HBA': Descriptors.NumHAcceptors(mol),
+                'TPSA': Descriptors.TPSA(mol),
                 'RotBonds': Descriptors.NumRotatableBonds(mol),
             }
             properties.append(props)
 def assess_drug_likeness(df: pd.DataFrame):
     """
     Assesses drug-likeness based on Lipinski's Rule of Five.
+    This version returns a boolean for plotting and a formatted string for display.
     """
     if df.empty:
         return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
     analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5
     analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
     analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
     analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
     display_df = df.copy()
     display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: '✅ Yes' if x else '❌ No')
     log = "✅ Assessed drug-likeness using Lipinski's Rule of Five.\n"
     return analysis_df, display_df, log
 def plot_properties_dashboard(df: pd.DataFrame):
+    """Creates a professional 2x2 dashboard of molecular property visualizations using Bokeh."""
+    from math import pi, cos, sin
     if df.empty or 'Drug_Like' not in df.columns:
         return None, "Cannot plot: No analysis data or 'Drug_Like' column missing."
     ])
     plot_config = {
+        'sizing_mode': 'scale_width', 'aspect_ratio': 1,
+        'background_fill_color': None, 'border_fill_color': None,
+        'outline_line_color': '#333333', 'min_border_left': 50,
+        'min_border_right': 50, 'min_border_top': 50, 'min_border_bottom': 50
     }
     def style_plot(p, x_label, y_label, title):
+        """Apply consistent professional styling to plots."""
+        p.title.text = title
+        p.title.text_color = '#FFFFFF'
+        p.title.text_font_size = '14pt'
+        p.title.text_font_style = 'bold'
+        p.xaxis.axis_label = x_label
+        p.yaxis.axis_label = y_label
+        p.axis.axis_label_text_color = '#CCCCCC'
+        p.axis.axis_label_text_font_size = '11pt'
+        p.axis.major_label_text_color = '#AAAAAA'
+        p.axis.major_label_text_font_size = '10pt'
+        p.grid.grid_line_color = '#2A2A2A'
+        p.grid.grid_line_alpha = 0.3
         if p.legend:
+            p.legend.location = "top_right"
+            p.legend.background_fill_color = '#1A1A1A'
+            p.legend.background_fill_alpha = 0.8
+            p.legend.border_line_color = '#444444'
+            p.legend.label_text_color = '#FFFFFF'
+            p.legend.click_policy = "mute"
         return p
+    p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
+    p1.scatter('MW', 'LogP', source=source, legend_group='Category',
+               color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
+    p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW ≤ 500")
+    p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP ≤ 5")
     style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
+    p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
+    p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
+    p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD ≤ 5")
+    p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA ≤ 10")
     style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
+    p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
+    p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
+    p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA ≤ 140")
+    p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds ≤ 10")
     style_plot(p3, "Topological Polar Surface Area (Å²)", "Rotatable Bonds", "Drug Permeability Indicators")
     p4_config = plot_config.copy()
+    p4_config['tools'] = "hover"
+    p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
     p4 = figure(title="Drug-Likeness Distribution", **p4_config)
+    # Calculate percentages for the doughnut chart
     counts = df['Category'].value_counts()
+    data = pd.DataFrame({'category': counts.index, 'value': counts.values})
     data['angle'] = data['value']/data['value'].sum() * 2*pi
+    data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
     data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1)
+    # Calculate overall drug-like percentage for central text
+    total_compounds = len(df)
+    drug_like_count = df['Drug_Like'].sum()
+    drug_like_percentage = (drug_like_count / total_compounds * 100) if total_compounds > 0 else 0
     wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
                      start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
+                     line_color="white", line_width=3, fill_color='color',
+                     legend_field='category', source=data)
+    # Updated HoverTool to display percentage
+    donut_hover = HoverTool(tooltips=[
+        ("Category", "@category"),
+        ("Count", "@value"),
+        ("Percentage", "@percentage{%0.1f}%%") # Display percentage with one decimal place
+    ], renderers=[wedge_renderer])
+    p4.add_tools(donut_hover)
+    # Updated central text to show Drug-Like percentage
+    p4.text([0], [0], text=[f"{total_compounds}\nCompounds\n({drug_like_percentage:.1f}% Drug-Like)"],
             text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold")
     style_plot(p4, "", "", "Compound Classification")
 # ===== Phase 2 Functions =====
 def get_phase2_molecules():
+    """
+    Returns an expanded list of common drugs with corrected SMILES for virtual screening.
+    These are chosen to be well-known and diverse in their therapeutic areas.
+    """
     return {
+        'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1',
+        'Ibuprofen (Pain/Inflammation)': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
+        'Aspirin (Pain/Antiplatelet)': 'CC(=O)Oc1ccccc1C(=O)O',
+        'Naproxen (Pain/Inflammation)': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
+        'Diazepam (Anxiolytic)': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12',
+        'Metformin (Diabetes)': 'CN(C)C(=N)N=C(N)N',
+        'Loratadine (Antihistamine)': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1',
+        'Morphine (Opioid Analgesic)': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
+        'Cetirizine (Antihistamine)': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO',
+        'Fluoxetine (Antidepressant)': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
+        'Amoxicillin (Antibiotic)': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C',
+        'Atorvastatin (Cholesterol)': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
+        'Ciprofloxacin (Antibiotic)': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12',
+        'Warfarin (Anticoagulant)': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
+        'Furosemide (Diuretic)': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
+        'Sildenafil (Erectile Dysfunction)': 'CCCC1=NN(C)C(=NC1=O)c1cc(N2CCN(C)CC2)c(OC)cc1S(=O)(=O)C',
+        'Omeprazole (GERD)': 'COc1ccc(C)c(c1NC(=O)c1cn(Cc2ccc(OC)cc2)cn1)OC', # Proton pump inhibitor
+        'Losartan (Hypertension)': 'Cc1cnc(n1C)c1ccc(cc1)-c1ccccc1COC(=O)c1ccccc1', # Angiotensin Receptor Blocker
     }
 def simulate_virtual_screening(smiles_dict: dict):
     np.random.seed(42)
     scores = np.random.uniform(2.0, 9.8, len(smiles_dict))
     log += f"✅ Predicted ADMET properties for {len(df)} molecules.\n"
     return df, log
+def visualize_molecule_2d_3d(smiles: str, name: str):
+    """Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
+    log = ""
     try:
         mol = Chem.MolFromSmiles(smiles)
+        if not mol: return f"<p>Invalid SMILES for {name}</p>", f"❌ Invalid SMILES for {name}"
         drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300)
+        # Set dark theme colors for 2D drawing
         drawer.drawOptions().clearBackground = False
+        drawer.drawOptions().addStereoAnnotation = True
+        drawer.drawOptions().baseFontSize = 0.8
+        drawer.drawOptions().circleAtoms = False
+        drawer.drawOptions().highlightColour = (1, 0.5, 0)  # Orange for highlights
+        # Set colors for dark background visibility
+        drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11)  # Dark background
+        drawer.drawOptions().symbolColour = (1, 1, 1)  # White symbols
+        drawer.drawOptions().defaultColour = (1, 1, 1)  # White default color
+        # Try to set annotation color (this might help with (R)/(S) labels)
+        try:
+            drawer.drawOptions().annotationColour = (1, 1, 1)  # White annotations
+        except:
+            pass
         drawer.DrawMolecule(mol)
         drawer.FinishDrawing()
         svg_2d = drawer.GetDrawingText().replace('svg:', '')
+        # More aggressive SVG text color fixes - target all possible black text variations
+        # First, comprehensive string replacements
+        svg_2d = svg_2d.replace('stroke="black"', 'stroke="white"')
+        svg_2d = svg_2d.replace('fill="black"', 'fill="white"')
+        svg_2d = svg_2d.replace('stroke="#000000"', 'stroke="#FFFFFF"')
+        svg_2d = svg_2d.replace('fill="#000000"', 'fill="#FFFFFF"')
+        svg_2d = svg_2d.replace('stroke="#000"', 'stroke="#FFF"')
+        svg_2d = svg_2d.replace('fill="#000"', 'fill="#FFF"')
+        svg_2d = svg_2d.replace('stroke:black', 'stroke:white')
+        svg_2d = svg_2d.replace('fill:black', 'fill:white')
+        svg_2d = svg_2d.replace('stroke:#000000', 'stroke:#FFFFFF')
+        svg_2d = svg_2d.replace('fill:#000000', 'fill:#FFFFFF')
+        svg_2d = svg_2d.replace('stroke:#000', 'stroke:#FFF')
+        svg_2d = svg_2d.replace('fill:#000', 'fill="#FFF"')
+        svg_2d = svg_2d.replace('stroke="rgb(0,0,0)"', 'stroke="rgb(255,255,255)"')
+        svg_2d = svg_2d.replace('fill="rgb(0,0,0)"', 'fill="rgb(255,255,255)"')
+        svg_2d = svg_2d.replace('stroke:rgb(0,0,0)', 'stroke:rgb(255,255,255)')
+        svg_2d = svg_2d.replace('fill:rgb(0,0,0)', 'fill:rgb(255,255,255)')
+        svg_2d = svg_2d.replace('color="black"', 'color="white"')
+        svg_2d = svg_2d.replace('color:#000000', 'color:#FFFFFF')
+        svg_2d = svg_2d.replace('color:#000', 'color:#FFF')
+        # Aggressive regex-based fixes for all text elements
+        # Remove any existing fill attributes from text elements and add white fill
+        svg_2d = re.sub(r'<text([^>]*?)\s+fill="[^"]*"([^>]*?)>', r'<text\1\2 fill="white">', svg_2d)
+        svg_2d = re.sub(r'<text([^>]*?)(?<!fill="white")>', r'<text\1 fill="white">', svg_2d)
+        # Fix style attributes in text elements
+        svg_2d = re.sub(r'<text([^>]*?)style="([^"]*?)fill:\s*(?:black|#000000|#000|rgb\(0,0,0\))([^"]*?)"([^>]*?)>',
+                       r'<text\1style="\2fill:white\3"\4>', svg_2d)
+        # If text elements don't have any fill specified, ensure they get white
+        svg_2d = re.sub(r'<text(?![^>]*fill=)([^>]*?)>', r'<text fill="white"\1>', svg_2d)
+        # Clean up any duplicate fill attributes
+        svg_2d = re.sub(r'fill="white"\s+fill="white"', 'fill="white"', svg_2d)
+        # Final catch-all: replace any remaining black in the entire SVG
+        svg_2d = re.sub(r'\bblack\b', 'white', svg_2d)
+        svg_2d = re.sub(r'#000000', '#FFFFFF', svg_2d)
+        svg_2d = re.sub(r'#000\b', '#FFF', svg_2d)
+        svg_2d = re.sub(r'rgb\(0,\s*0,\s*0\)', 'rgb(255,255,255)', svg_2d)
+        # Embed the SVG within a div with a dark background for consistency
+        svg_2d = f'<div style="background-color: #1C1C1C; padding: 10px; border-radius: 5px;">{svg_2d}</div>'
         mol_3d = Chem.AddHs(mol)
         AllChem.EmbedMolecule(mol_3d, randomSeed=42)
         AllChem.MMFFOptimizeMolecule(mol_3d)
         viewer.addModel(sdf_data, "sdf")
         viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}})
         viewer.zoomTo()
+        html_3d = viewer._make_html()
+        combined_html = f"""
+        <div style="display: flex; flex-direction: row; align-items: center; justify-content: space-around; border: 1px solid #444; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #2b2b2b;">
+            <div style="text-align: center;">
+                <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (2D Structure)</h4>
+                {svg_2d}
+            </div>
+            <div style="text-align: center;">
+                <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (3D Interactive)</h4>
+                {html_3d}
+            </div>
+        </div>
+        """
+        log += f"✅ Generated 2D/3D view for {name}.\n"
+        return combined_html, log
     except Exception as e:
+        return f"<p>Error visualizing {name}: {e}</p>", f"❌ Error visualizing {name}: {e}"
 def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
     """
     Generates a protein-ligand interaction visualization using py3Dmol.
     """
     if not pdb_data:
         return None, "Cannot generate interaction view: No PDB data provided."
     try:
+        viewer = py3Dmol.view(width='100%', height=650)
         viewer.setBackgroundColor('#1C1C1C')
+        # Add the protein structure
         viewer.addModel(pdb_data, "pdb")
+        # Style the protein (cartoon representation)
         viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
+        # Highlight the ligand if specified
         if ligand_resn:
             viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
             viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
+        # Add surface representation for binding site
+        viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
+        viewer.zoomTo({'resn': ligand_resn} if ligand_resn else {})
+        html = viewer._make_html()
         log = f"✅ Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
+        return html, log
     except Exception as e:
         return None, f"❌ Interaction visualization error: {e}"
 # ===== Phase 3 Functions =====
 def get_phase3_molecules():
+    """
+    Returns an expanded list of lead compounds for optimization.
+    These are chosen to be representative of active pharmaceutical ingredients or advanced candidates.
+    """
     return {
         'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C',
         'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O',
         'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4',
         'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C',
         "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
+        "Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C", # Multi-kinase inhibitor for cancer
+        # CORRECTED SMILES for Venetoclax
         "Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C",
+        "Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O", # Multi-kinase inhibitor for leukemia
     }
 def calculate_comprehensive_properties(smiles_dict: dict):
 # ===== Phase 4 Functions =====
 def get_regulatory_summary():
     summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'],
+               'Description': ['Data sourced from ChEMBL, PDB, GISAID. Bias assessed via geographic distribution analysis.',
+                               'Graph Convolutional Network (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).',
                                'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.',
                                'SHAP used for patient stratification model outputs.']}
     return pd.DataFrame(summary), "✅ Generated AI/ML documentation summary."
 def simulate_rwd_analysis(adverse_event_text):
+    """
+    Analyzes simulated adverse event text and generates a DataFrame and Bokeh plot.
+    """
     np.random.seed(42)
     base_events = list(np.random.choice(
+        ['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea', 'constipation', 'insomnia', 'muscle pain'],
+        100,
+        p=[0.2, 0.15, 0.12, 0.12, 0.1, 0.08, 0.08, 0.05, 0.05, 0.05] # Adjusted probabilities for new events
     ))
     user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
     all_events = base_events + user_terms
+    events_df = pd.DataFrame(all_events, columns=['Adverse_Event'])
+    event_counts = events_df['Adverse_Event'].value_counts().nlargest(10).sort_values(ascending=False)
     results_df = event_counts.reset_index()
     results_df.columns = ['Adverse_Event', 'Frequency']
+    log = f"✅ Analyzed {len(all_events)} total event reports. Identified {len(event_counts)} unique adverse events for plotting.\n"
+    # Create Bokeh Plot
     source = ColumnDataSource(results_df)
+    y_range = results_df['Adverse_Event'].tolist()[::-1]
+    hover = HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")])
+    p = figure(
+        y_range=y_range, height=450, title="Top 10 Reported Adverse Events",
+        sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save",
+    )
+    p.add_tools(hover)
+    p.hbar(
+        y='Adverse_Event', right='Frequency', source=source, height=0.7,
+        color='#00A0FF', line_color='white', legend_label="Event Frequency"
+    )
+    # Style the plot for a dark theme
     p.background_fill_color = "#1C1C1C"
     p.border_fill_color = "#1C1C1C"
+    p.outline_line_color = '#333333'
     p.title.text_color = "white"
+    p.title.text_font_size = '16pt'
+    p.title.align = "center"
+    p.xaxis.axis_label = "Frequency Count"
+    p.yaxis.axis_label = "Adverse Event"
+    p.axis.axis_label_text_color = "#CCCCCC"
+    p.axis.axis_label_text_font_size = "12pt"
+    p.axis.major_label_text_color = "#AAAAAA"
+    p.axis.major_label_text_font_size = "10pt"
+    p.grid.grid_line_alpha = 0.3
+    p.grid.grid_line_color = "#444444"
+    p.x_range.start = 0
+    p.legend.location = "top_right"
+    p.legend.background_fill_color = "#2A2A2A"
+    p.legend.background_fill_alpha = 0.7
+    p.legend.border_line_color = "#444444"
+    p.legend.label_text_color = "white"
     return results_df, p, log
 def get_ethical_framework():
     framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
                  'Implementation Strategy': [
                      'AI models prioritize patient outcomes and clinical efficacy.',
+                     'Toxicity prediction and pharmacovigilance models aim to minimize patient harm.',
+                     'Algorithms are audited for demographic bias in training data and predictions.',
                      'Model cards and SHAP values are provided for key decision-making processes.'
                  ]}
     return pd.DataFrame(framework), "✅ Generated Ethical AI Framework summary."
 # --- 3. Streamlit UI Layout ---
+# Initialize session state variables
+if 'active_tab' not in st.session_state: st.session_state.active_tab = "Phase 1: Target Identification"
 if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
 if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
 if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
 if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
 if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
+# --- Header ---
 st.title("🔬 AI-Powered Drug Discovery Pipeline")
 st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
+# --- Main Tabs for Each Phase ---
 tab1, tab2, tab3, tab4 = st.tabs([
+    "**Phase 1:** Target Identification",
+    "**Phase 2:** Hit Discovery & ADMET",
+    "**Phase 3:** Lead Optimization",
+    "**Phase 4:** Pre-clinical & RWE"
 ])
 # --- Phase 1: Target Identification ---
 with tab1:
     st.header("Phase 1: Target Identification & Initial Analysis")
+    st.markdown("""
+    In this initial phase, we identify and analyze a biological target (e.g., a protein) implicated in a disease.
+    We fetch its 3D structure and sequence data, then evaluate a set of initial compounds for their drug-like properties.
+    """)
+    st.subheader("Inputs & Controls")
+    # Updated PDB ID options
+    pdb_options = {
+        "Neuraminidase (Influenza - 2HU4)": "2HU4",
+        "KRAS G12D (Oncogenic Target - 7XKJ)": "7XKJ", # Bound to MRTX-1133
+        "SARS-CoV-2 Mpro (Antiviral Target - 8HUR)": "8HUR", # Bound to Ensitrelvir
+        "EGFR Kinase (Cancer Target - 1M17)": "1M17", # Bound to Erlotinib
+    }
     selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0)
     pdb_id_input = pdb_options[selected_pdb_name]
+    # Updated NCBI Protein ID options
+    protein_options = {
+        "Neuraminidase (P03468)": "P03468", # Influenza A virus (A/PR/8/34)
+        "KRAS (P01116)": "P01116", # Human KRAS
+        "SARS-CoV-2 Main Protease (P0DTD1)": "P0DTD1", # SARS-CoV-2 Mpro
+        "EGFR (P00533)": "P00533", # Human Epidermal Growth Factor Receptor
+    }
     selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0)
     protein_id_input = protein_options[selected_protein_name]
     st.markdown("---")
+    st.write("**Analyze Sample Compounds:**")
     sample_molecules = create_sample_molecules()
     selected_molecules = st.multiselect(
+        "Select from known drugs:",
+        options=list(sample_molecules.keys()),
+        default=["Oseltamivir (Influenza)", "Aspirin (Pain/Inflammation)", "Imatinib (Gleevec - Cancer)"] # Adjusted default selection
     )
     if st.button("🚀 Run Phase 1 Analysis", key="run_p1"):
+        with st.spinner("Fetching data and calculating properties..."):
             full_log = "--- Phase 1 Analysis Started ---\n"
             pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
             full_log += log_pdb
+            log_fasta = fetch_fasta_sequence(protein_id_input)
+            full_log += log_fasta
             smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
             properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
             full_log += log_props
             analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
             full_log += log_likeness
+            protein_view_html, log_3d = visualize_protein_3d(pdb_data, title=f"PDB: {pdb_id_input}")
             full_log += log_3d
             dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
             full_log += log_dash
+            full_log += "\n--- Phase 1 Analysis Complete ---"
+            st.session_state.log_p1 = full_log
+            st.session_state.results_p1 = {
+                'pdb_data': pdb_data,
+                'protein_view': protein_view_html,
+                'properties_df': display_df,
+                'dashboard': dashboard_plot
+            }
+    st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
+    st.subheader("Results")
+    if not st.session_state.results_p1:
+        st.info("Click 'Run Phase 1 Analysis' to generate and display results.")
+    else:
         res1 = st.session_state.results_p1
         p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
         with p1_tabs[0]:
             st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
+            if res1.get('protein_view'):
+                st.components.v1.html(res1['protein_view'], height=600, scrolling=False)
+            else:
+                st.warning("Could not display 3D structure. Check PDB ID and logs.")
         with p1_tabs[1]:
             st.subheader("Physicochemical Properties Analysis")
+            # The data table is now displayed *before* the dashboard.
             st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
             if res1.get('dashboard'):
                 st.bokeh_chart(res1['dashboard'], use_container_width=True)
 # --- Phase 2: Hit Discovery & ADMET ---
 with tab2:
     st.header("Phase 2: Virtual Screening & Early ADMET")
+    st.markdown("""
+    This phase simulates a virtual screening process to identify 'hits' from a larger library of compounds.
+    We predict their binding affinity to the target and assess their basic ADMET (Absorption, Distribution,
+    Metabolism, Excretion, Toxicity) profiles.
+    """)
+    st.subheader("Inputs & Controls")
     p2_molecules = get_phase2_molecules()
     st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
+    # Updated PDB ID for Interaction options
     interaction_pdb_options = {
+        "Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"},
+        "KRAS G12C + MRTX-1133 (7XKJ)": {"pdb": "7XKJ", "ligand": "M13"},
+        "SARS-CoV-2 Mpro + Ensitrelvir (8HUR)": {"pdb": "8HUR", "ligand": "X77"},
+        "EGFR + Erlotinib (1M17)": {"pdb": "1M17", "ligand": "ERL"},
     }
+    selected_interaction_pdb_name = st.selectbox(
+        "Select PDB ID for Interaction:",
+        options=list(interaction_pdb_options.keys()),
+        index=0 # Default to Neuraminidase
+    )
     p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"]
     p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"]
+    st.write(f"Selected PDB: `{p2_pdb_id}`, Selected Ligand Residue Name: `{p2_ligand_resn}`")
     if st.button("🚀 Run Phase 2 Analysis", key="run_p2"):
+        with st.spinner("Running virtual screening and ADMET predictions..."):
             full_log = "--- Phase 2 Analysis Started ---\n"
             screening_df, log_screen = simulate_virtual_screening(p2_molecules)
             full_log += log_screen
             admet_df, log_admet = predict_admet_properties(p2_molecules)
             full_log += log_admet
             merged_df = pd.merge(screening_df, admet_df, on="Molecule")
             pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
             full_log += log_pdb_p2
+            interaction_view, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
             full_log += log_interact
+            full_log += "\n--- Phase 2 Analysis Complete ---"
+            st.session_state.log_p2 = full_log
+            st.session_state.results_p2 = {
+                'merged_df': merged_df,
+                'interaction_view': interaction_view
+            }
+    st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
+    st.subheader("Results")
+    if not st.session_state.results_p2:
+        st.info("Click 'Run Phase 2 Analysis' to generate and display results.")
+    else:
         res2 = st.session_state.results_p2
         p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
         with p2_tabs[0]:
+            st.subheader("Virtual Screening & Early ADMET Predictions")
             st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p2_tabs[1]:
+            st.subheader(f"Simulated Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
+            if res2.get('interaction_view'):
+                st.components.v1.html(res2['interaction_view'], height=700, scrolling=False)
+            else:
+                st.warning("Could not display interaction view. Check inputs and logs.")
 # --- Phase 3: Lead Optimization ---
 with tab3:
     st.header("Phase 3: Lead Compound Optimization")
+    st.markdown("""
+    In lead optimization, promising 'hit' compounds are refined to improve their efficacy and safety.
+    Here, we analyze a few selected lead candidates, perform more detailed property calculations,
+    and predict their toxicity risk using a simulated machine learning model.
+    """)
+    st.subheader("Inputs & Controls")
     p3_molecules = get_phase3_molecules()
     selected_leads = st.multiselect(
+        "Select lead compounds to optimize:",
+        options=list(p3_molecules.keys()),
+        default=['Oseltamivir (Influenza)', 'Remdesivir (Antiviral)', 'Imatinib (Gleevec - Cancer)'] # Adjusted default selection
     )
     if st.button("🚀 Run Phase 3 Analysis", key="run_p3"):
+        with st.spinner("Analyzing lead compounds and predicting toxicity..."):
             full_log = "--- Phase 3 Analysis Started ---\n"
             smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
             comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
             full_log += log_comp
             toxicity_df, log_tox = predict_toxicity(comp_props_df)
             full_log += log_tox
             final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
+            visuals = {}
+            for name, smiles in smiles_to_analyze_p3.items():
+                html_view, log_vis = visualize_molecule_2d_3d(smiles, name)
+                visuals[name] = html_view
+                full_log += log_vis
+            full_log += "\n--- Phase 3 Analysis Complete ---"
+            st.session_state.log_p3 = full_log
+            st.session_state.results_p3 = {
+                'final_df': final_df,
+                'visuals': visuals
+            }
+    st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
+    st.subheader("Results")
+    if not st.session_state.results_p3:
+        st.info("Click 'Run Phase 3 Analysis' to generate and display results.")
+    else:
+        # Corrected from results_3 to results_p3
         res3 = st.session_state.results_p3
         st.subheader("Lead Compound Analysis & Toxicity Prediction")
         st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         st.subheader("2D & 3D Molecular Structures")
+        for name, visual_html in res3.get('visuals', {}).items():
+            st.components.v1.html(visual_html, height=430, scrolling=False)
 # --- Phase 4: Pre-clinical & RWE ---
 with tab4:
     st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
+    st.markdown("""
+    This final phase simulates post-market analysis. We analyze text data for adverse events (pharmacovigilance)
+    and present documentation related to the AI models and ethical frameworks that would be required for regulatory submission.
+    """)
+    st.subheader("Inputs & Controls")
     rwd_input = st.text_area(
         "Enter simulated adverse event report text:",
+        "Patient reports include instances of headache, severe nausea, and occasional skin rash. Some noted dizziness after taking the medication.",
+        height=150
     )
     if st.button("🚀 Run Phase 4 Analysis", key="run_p4"):
+        with st.spinner("Analyzing real-world data and generating reports..."):
             full_log = "--- Phase 4 Analysis Started ---\n"
             reg_df, log_reg = get_regulatory_summary()
             full_log += log_reg
             eth_df, log_eth = get_ethical_framework()
             full_log += log_eth
             rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
             full_log += log_rwd
+            full_log += "\n--- Phase 4 Analysis Complete ---"
+            st.session_state.log_p4 = full_log
+            st.session_state.results_p4 = {
+                'rwd_df': rwd_df,
+                'plot_bar': plot_bar,
+                'reg_df': reg_df,
+                'eth_df': eth_df
+            }
+    st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
+    st.subheader("Results")
+    if not st.session_state.results_p4:
+        st.info("Click 'Run Phase 4 Analysis' to generate and display results.")
+    else:
         res4 = st.session_state.results_p4
         p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
         with p4_tabs[0]:
+            st.subheader("Simulated Adverse Event Analysis")
             if res4.get('plot_bar'):
                 st.bokeh_chart(res4['plot_bar'], use_container_width=True)
             st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p4_tabs[1]:
             st.subheader("AI/ML Model Regulatory Summary")
             st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
             st.subheader("Ethical AI Framework")
+            st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)