Spaces:

alidenewade
/

drug-discovery-pipeline

Running

App Files Files Community

alidenewade commited on Jun 24

Commit

ed20d10

verified ·

1 Parent(s): 1818804

Update app.py

Browse files

Files changed (1) hide show

app.py +361 -462

app.py CHANGED Viewed

@@ -1,12 +1,8 @@
-# app.py
 # AI-Powered Drug Discovery Pipeline Streamlit Application
 # This script integrates four phases of drug discovery into a single, interactive web app.
 import streamlit as st
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
 import requests
 import io
 import re
@@ -41,16 +37,15 @@ st.set_page_config(
     page_title="AI Drug Discovery Pipeline",
     page_icon="🔬",
     layout="wide",
-    initial_sidebar_state="collapsed", # Sidebar is removed, but this is good practice
 )
-# Custom CSS for a professional, minimalist look
 def apply_custom_styling():
     st.markdown(
         """
         <style>
         @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
         html, body, [class*="st-"] {
             font-family: 'Roboto', sans-serif;
         }
@@ -92,6 +87,11 @@ def apply_custom_styling():
             color: #FFF;
             background-color: #00A0FF;
         }
         </style>
         """,
         unsafe_allow_html=True
@@ -189,7 +189,7 @@ def calculate_molecular_properties(smiles_dict: dict):
         mol = Chem.MolFromSmiles(smiles)
         if mol:
             props = {
-                'Molecule': name, # Use the provided name
                 'SMILES': smiles,
                 'MW': Descriptors.MolWt(mol),
                 'LogP': Descriptors.MolLogP(mol),
@@ -214,7 +214,6 @@ def assess_drug_likeness(df: pd.DataFrame):
     if df.empty:
         return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
-    # Create a copy for analysis to avoid modifying the original dataframe
     analysis_df = df.copy()
     analysis_df['MW_OK'] = analysis_df['MW'] <= 500
     analysis_df['LogP_OK'] = analysis_df['LogP'] <= 5
@@ -222,17 +221,14 @@ def assess_drug_likeness(df: pd.DataFrame):
     analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
     analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
-    # This boolean column is for the plotting function
     analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
-    # Create a separate DataFrame for display purposes with emojis
     display_df = df.copy()
     display_df['Lipinski_Violations'] = analysis_df['Lipinski_Violations']
     display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: '✅ Yes' if x else '❌ No')
     log = "✅ Assessed drug-likeness using Lipinski's Rule of Five.\n"
-    # Return both the analysis_df (for plotting) and display_df (for tables)
     return analysis_df, display_df, log
@@ -245,36 +241,22 @@ def plot_properties_dashboard(df: pd.DataFrame):
     if df['Drug_Like'].dtype != bool:
         return None, f"Cannot plot: 'Drug_Like' column must be boolean, but it is {df['Drug_Like'].dtype}."
-    # Prepare data
     df['Category'] = df['Drug_Like'].apply(lambda x: 'Drug-Like' if x else 'Non-Drug-Like')
     source = ColumnDataSource(df)
-    # Professional color palette
-    colors = ['#00D4AA', '#FF6B6B']  # Teal for drug-like, coral for non-drug-like
     color_mapper = factor_cmap('Category', palette=colors, factors=["Drug-Like", "Non-Drug-Like"])
-    # Enhanced hover tooltip for scatter plots
     scatter_hover = HoverTool(tooltips=[
-        ("Compound", "@Molecule"),
-        ("MW", "@MW{0.0} Da"),
-        ("LogP", "@LogP{0.00}"),
-        ("HBD", "@HBD"),
-        ("HBA", "@HBA"),
-        ("TPSA", "@TPSA{0.0} Å²"),
-        ("Category", "@Category")
     ])
-    # Common plot configuration - responsive plots with a 1:1 aspect ratio
     plot_config = {
-        'sizing_mode': 'scale_width',
-        'aspect_ratio': 1,  # Enforce a square aspect ratio for the data area
-        'background_fill_color': None,
-        'border_fill_color': None,
-        'outline_line_color': '#333333',
-        'min_border_left': 50,
-        'min_border_right': 50,
-        'min_border_top': 50,
-        'min_border_bottom': 50
     }
     def style_plot(p, x_label, y_label, title):
@@ -300,146 +282,61 @@ def plot_properties_dashboard(df: pd.DataFrame):
             p.legend.background_fill_alpha = 0.8
             p.legend.border_line_color = '#444444'
             p.legend.label_text_color = '#FFFFFF'
-            p.legend.label_text_font_size = '10pt'
             p.legend.click_policy = "mute"
-            p.legend.glyph_height = 15
-            p.legend.spacing = 5
         return p
-    # Plot 1: MW vs LogP with Lipinski guidelines
     p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
     p1.scatter('MW', 'LogP', source=source, legend_group='Category',
                color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
-    # Add Lipinski rule lines
-    p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5],
-            line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW ≤ 500")
-    p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5],
-            line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP ≤ 5")
     style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
-    # Plot 2: HBD vs HBA
     p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
-    p2.scatter('HBD', 'HBA', source=source, legend_group='Category',
-               color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
-    # Add Lipinski rule lines
-    p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1],
-            line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD ≤ 5")
-    p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10],
-            line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA ≤ 10")
     style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
-    # Plot 3: TPSA vs Rotatable Bonds with guidelines
     p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
-    p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category',
-               color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
-    # Add permeability guideline lines
-    p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1],
-            line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA ≤ 140")
-    p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10],
-            line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds ≤ 10")
     style_plot(p3, "Topological Polar Surface Area (Å²)", "Rotatable Bonds", "Drug Permeability Indicators")
-    # Plot 4: Enhanced Donut Chart
-    # --- MODIFICATION ---
-    # Configure donut plot separately as it doesn't need all scatter tools
     p4_config = plot_config.copy()
-    p4_config['tools'] = "hover" # Only need hover for the donut
     p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
     p4 = figure(title="Drug-Likeness Distribution", **p4_config)
-    # Calculate percentages and create donut chart
     counts = df['Category'].value_counts()
-    total = counts.sum()
-    data = pd.DataFrame({
-        'category': counts.index,
-        'value': counts.values,
-        'percentage': (counts.values / total * 100), # Keep full precision for hover
-        'angle': counts.values / total * 2 * pi,
-        'color': [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
-    })
-    # Calculate start and end angles for each wedge
-    data['start_angle'] = 0
-    data['end_angle'] = 0
-    cumulative_angle = 0
-    for i in range(len(data)):
-        data.iloc[i, data.columns.get_loc('start_angle')] = cumulative_angle
-        cumulative_angle += data.iloc[i]['angle']
-        data.iloc[i, data.columns.get_loc('end_angle')] = cumulative_angle
-    donut_source = ColumnDataSource(data)
-    # Create donut using annular wedges (outer ring) - sized to fit within boundaries
     wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
-                     start_angle='start_angle', end_angle='end_angle',
                      line_color="white", line_width=3, fill_color='color',
-                     legend_field='category', source=donut_source)
-    # Add percentage text to each slice
-    for i, row in data.iterrows():
-        # Calculate middle angle for text positioning
-        mid_angle = (row['start_angle'] + row['end_angle']) / 2
-        # Position text at middle radius of the annular wedge
-        text_radius = 0.35
-        x_pos = text_radius * cos(mid_angle)
-        y_pos = text_radius * sin(mid_angle)
-        p4.text([x_pos], [y_pos], text=[f"{row['percentage']:.1f}%"],
-                text_align="center", text_baseline="middle",
-                text_color="white", text_font_size="11pt", text_font_style="bold")
-    # Add center text
-    p4.text([0], [0], text=[f"{len(df)}\nCompounds"],
-            text_align="center", text_baseline="middle",
-            text_color="white", text_font_size="14pt", text_font_style="bold")
-    # --- MODIFICATION ---
-    # Custom hover for donut with detailed info
-    donut_hover = HoverTool(
-        tooltips=[
-            ("Category", "@category"),
-            ("Count", "@value"),
-            ("Percentage", "@percentage{0.1f}%")
-        ],
-        renderers=[wedge_renderer] # Attach hover tool specifically to the wedge glyph
-    )
     p4.add_tools(donut_hover)
     style_plot(p4, "", "", "Compound Classification")
     p4.axis.visible = False
     p4.grid.visible = False
-    # Create responsive grid layout
-    grid = gridplot([[p1, p2], [p3, p4]], sizing_mode='scale_width',
-                   toolbar_location='right', merge_tools=True)
     return grid, "✅ Generated enhanced molecular properties dashboard."
 # ===== Phase 2 Functions =====
 def get_phase2_molecules():
     """Returns an expanded list of common drugs with corrected SMILES."""
     return {
-        'Paracetamol': 'CC(=O)Nc1ccc(O)cc1',
-        'Ibuprofen': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
-        'Aspirin': 'CC(=O)Oc1ccccc1C(=O)O',
-        'Naproxen': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
-        'Diazepam': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12',
-        'Metformin': 'CN(C)C(=N)N=C(N)N',
-        'Loratadine': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1',
-        'Morphine': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
-        'Cetirizine': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO',
-        'Fluoxetine': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
-        'Amoxicillin': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C',
-        'Atorvastatin': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
-        'Ciprofloxacin': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12',
-        'Warfarin': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
         'Furosemide': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
     }
@@ -468,8 +365,6 @@ def predict_admet_properties(smiles_dict: dict):
     log += f"✅ Predicted ADMET properties for {len(df)} molecules.\n"
     return df, log
-# --- MODIFIED FUNCTION ---
-# This is the updated function to correctly render 2D molecules on a dark background.
 def visualize_molecule_2d_3d(smiles: str, name: str):
     """Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
     log = ""
@@ -575,27 +470,41 @@ def visualize_molecule_2d_3d(smiles: str, name: str):
         return combined_html, log
     except Exception as e:
         return f"<p>Error visualizing {name}: {e}</p>", f"❌ Error visualizing {name}: {e}"
-def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn='G39'):
-    """Visualizes a protein-ligand binding site using py3Dmol."""
-    if not pdb_data: return None, "Cannot generate view: No PDB data provided."
     try:
-        viewer = py3Dmol.view(width='100%', height=700)
         viewer.setBackgroundColor('#1C1C1C')
         viewer.addModel(pdb_data, "pdb")
-        viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
-        viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'lightblue'})
-        viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.3}, 'sphere': {'scale': 0.4, 'colorscheme': 'greenCarbon'}})
-        viewer.addStyle({'within': {'distance': 4, 'sel': {'resn': ligand_resn}}}, {'stick': {'colorscheme': 'orangeCarbon', 'radius': 0.2}})
-        viewer.zoomTo({'resn': ligand_resn})
         html = viewer._make_html()
-        log = (f"✅ Generated protein-ligand interaction view for PDB {pdb_id}.\n"
-               f"🟢 Green: Ligand ({ligand_resn})\n"
-               f"🟠 Orange: Residues within 4Å of ligand\n")
         return html, log
     except Exception as e:
-        return None, f"❌ Protein-ligand visualization error: {e}"
 # ===== Phase 3 Functions =====
 def get_phase3_molecules():
     return {
@@ -654,368 +563,358 @@ def get_regulatory_summary():
     return pd.DataFrame(summary), "✅ Generated AI/ML documentation summary."
 def simulate_rwd_analysis(adverse_event_text):
     np.random.seed(42)
-    base_events = list(np.random.choice(['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever'], 100, p=[0.25, 0.2, 0.15, 0.15, 0.15, 0.1]))
-    user_events = [e.strip().lower() for e in adverse_event_text.split(',') if e.strip()]
-    all_events = base_events + user_events
-    event_counts = pd.Series(all_events).value_counts()
-    log = f"✅ Analyzed {len(all_events)} simulated adverse event reports.\n"
-    plt.style.use('dark_background')
-    fig_bar, ax_bar = plt.subplots(figsize=(10, 6))
-    fig_bar.patch.set_facecolor('none')
-    ax_bar.set_facecolor('none')
-    sns.barplot(x=event_counts.values, y=event_counts.index, palette='viridis', ax=ax_bar, orient='h')
-    ax_bar.set_title('Simulated Adverse Event Frequencies')
-    ax_bar.set_xlabel('Number of Reports')
-    ax_bar.set_ylabel('Adverse Event')
-    plt.tight_layout()
-    return event_counts.reset_index().rename(columns={'index': 'Event', 0: 'Count'}), fig_bar, log
-def get_ethical_framework():
-    framework = {'Pillar': ['1. Beneficence & Non-Maleficence', '2. Justice & Fairness', '3. Transparency & Explainability', '4. Accountability & Governance'],
-                 'Description': ['AI should help patients and do no harm. Requires rigorous validation and safety monitoring.',
-                                 'AI must not create or worsen health disparities. Requires bias detection and mitigation.',
-                                 'Clinical decisions influenced by AI must be understandable. Requires interpretable models.',
-                                 'Clear lines of responsibility for AI systems must be established. Requires human oversight.']}
-    return pd.DataFrame(framework), "✅ Generated ethical framework summary."
-# --- 3. Streamlit Interface Definition ---
-st.title("🔬 AI-Powered Drug Discovery Pipeline")
-st.markdown("""
-Welcome to the AI Drug Discovery Pipeline Demonstrator. This application integrates the four major phases of drug development,
-showcasing how AI and computational tools can accelerate the process from target identification to post-market surveillance.
-Navigate through the tabs below to explore each phase.
-""")
-# Initialize session state for logs and results
-if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Phase 1 logs will appear here."
 if 'results_p1' not in st.session_state: st.session_state.results_p1 = {}
-if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Phase 2 logs will appear here."
 if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
-if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Phase 3 logs will appear here."
 if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
-if 'log_p4' not in st.session_state: st.session_state.log_p4 = "Phase 4 logs will appear here."
 if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
 tab1, tab2, tab3, tab4 = st.tabs([
-    "Phase 1: Discovery & Target ID",
-    "Phase 2: Lead Generation & Optimization",
-    "Phase 3: Preclinical Development",
-    "Phase 4: Implementation & Post-Market"
 ])
-# ===== TAB 1: DISCOVERY & TARGET IDENTIFICATION =====
 with tab1:
-    st.header("🧬 Step 1: Target Identification and Initial Analysis")
-    st.markdown("Fetch protein data from public databases and perform a high-level analysis of potential drug-like molecules.")
-    with st.form(key="phase1_form"):
-        st.subheader("Analysis Controls")
-        col1, col2 = st.columns(2)
-        with col1:
-            pdb_id_input = st.text_input("Enter PDB ID", value="3B7E", key="p1_pdb")
-            protein_id_input = st.text_input("Enter Protein ID (for FASTA)", value="ACF54602.1", key="p1_protein")
-        with col2:
-            default_molecules_p1 = create_sample_molecules()
-            default_molecules_text_p1 = "\n".join([f"{name}:{smiles}" for name, smiles in default_molecules_p1.items()])
-            molecules_input_p1 = st.text_area(
-                "Molecules (Name:SMILES, one per line)",
-                value=default_molecules_text_p1,
-                height=150,
-                key="p1_molecules"
-            )
-        run_phase1_btn = st.form_submit_button("🚀 Run Phase 1 Analysis", use_container_width=True)
-    if run_phase1_btn:
-        full_log = "--- Starting Phase 1 Analysis ---\n"
-        # Parse molecules from the text area
-        smiles_dict_p1 = {}
-        if molecules_input_p1.strip():
-            try:
-                for line in molecules_input_p1.strip().split('\n'):
-                    cleaned_line = line.replace('\xa0', ' ').strip()
-                    if ':' in cleaned_line:
-                        name, smiles = cleaned_line.split(':', 1)
-                        smiles_dict_p1[name.strip()] = smiles.strip()
-                if smiles_dict_p1:
-                    full_log += f"✅ Successfully parsed {len(smiles_dict_p1)} molecules from input.\n"
-                else:
-                    full_log += "⚠️ Could not parse any molecules. Please check the format (e.g., 'Aspirin:CC...').\n"
-            except Exception as e:
-                full_log += f"❌ Error parsing molecules list: {e}\n"
-                smiles_dict_p1 = {}
-        else:
-            full_log += "⚠️ Molecule input is empty. No analysis to perform.\n"
-        if smiles_dict_p1:
-            pdb_data, log_pdb_fetch = fetch_pdb_structure(pdb_id_input)
-            full_log += log_pdb_fetch
-            fasta_log = fetch_fasta_sequence(protein_id_input)
-            full_log += fasta_log
-            protein_view_html, log_3d_viz = visualize_protein_3d(pdb_data, pdb_id_input)
-            full_log += log_3d_viz
-            props_df, log_props = calculate_molecular_properties(smiles_dict_p1)
             full_log += log_props
-            analysis_df, display_df, log_lipinski = assess_drug_likeness(props_df)
-            full_log += log_lipinski
-            props_plot, log_plot = plot_properties_dashboard(analysis_df) # This now calls the Bokeh function
-            full_log += log_plot
-            lipinski_cols = ['Molecule', 'MW', 'LogP', 'HBD', 'HBA', 'Lipinski_Violations', 'Drug_Like']
-            lipinski_subset_df = display_df[lipinski_cols] if not display_df.empty else pd.DataFrame(columns=lipinski_cols)
             st.session_state.results_p1 = {
-                'protein_view_html': protein_view_html,
-                'fasta_log': fasta_log,
-                'lipinski_subset_df': lipinski_subset_df,
-                'props_df': props_df,
-                'props_plot': props_plot
             }
-        else:
-            st.session_state.results_p1 = {}
-        full_log += "\n--- Phase 1 Analysis Complete ---"
-        st.session_state.log_p1 = full_log
     st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
-    if st.session_state.results_p1:
         res1 = st.session_state.results_p1
-        p1_tabs = st.tabs(["Analysis Plots", "Molecule Analysis", "Protein Information"])
         with p1_tabs[0]:
-            st.subheader("Molecular Properties Dashboard")
-            if res1.get('props_plot'):
-                # Use st.bokeh_chart for Bokeh figures
-                st.bokeh_chart(res1['props_plot'], use_container_width=True)
             else:
-                st.warning("Could not generate plots. Please check the logs for more details.")
         with p1_tabs[1]:
-            st.subheader("Drug-Likeness Assessment (Lipinski's Rule of Five)")
-            st.dataframe(res1.get('lipinski_subset_df', pd.DataFrame()), use_container_width=True, hide_index=True)
-            st.subheader("Calculated Molecular Properties")
-            st.dataframe(res1.get('props_df', pd.DataFrame()), use_container_width=True, hide_index=True)
-        with p1_tabs[2]:
-            st.subheader("Protein 3D Structure (Interactive)")
-            if res1.get('protein_view_html'):
-                st.components.v1.html(res1['protein_view_html'], height=600, scrolling=False)
-            st.subheader("FASTA Sequence Information")
-            st.text_area("", res1.get('fasta_log', 'No data'), height=200, key="fasta_info_area")
-# ===== TAB 2: LEAD GENERATION & OPTIMIZATION =====
 with tab2:
-    st.header("💊 Step 2: Virtual Screening and ADMET Prediction")
-    st.markdown("Screen candidate molecules against the target, predict their ADMET properties, and visualize the top candidates.")
-    with st.form(key="phase2_form"):
-        st.subheader("Analysis Controls")
-        col1, col2 = st.columns(2)
-        with col1:
-            phase2_pdb_id_input = st.text_input("Enter PDB ID for Interaction View", value="3B7E", key="p2_pdb")
-            phase2_ligand_resn = st.text_input("Ligand Residue Name (in PDB)", value="G39", key="p2_ligand")
-        with col2:
-            default_molecules_dict = get_phase2_molecules()
-            default_molecules_text = "\n".join([f"{name}:{smiles}" for name, smiles in default_molecules_dict.items()])
-            molecules_input = st.text_area(
-                "Molecules (Name:SMILES, one per line)",
-                value=default_molecules_text,
-                height=250,
-                key="p2_molecules"
-            )
-        run_phase2_btn = st.form_submit_button("🚀 Run Phase 2 Analysis", use_container_width=True)
-    if run_phase2_btn:
-        full_log = "--- Starting Phase 2 Analysis ---\n"
-        smiles_dict = {}
-        if molecules_input.strip():
-            try:
-                for line in molecules_input.strip().split('\n'):
-                    cleaned_line = line.replace('\xa0', ' ').strip()
-                    if ':' in cleaned_line:
-                        name, smiles = cleaned_line.split(':', 1)
-                        smiles_dict[name.strip()] = smiles.strip()
-                if smiles_dict:
-                    full_log += f"✅ Successfully parsed {len(smiles_dict)} molecules from input.\n"
-                else:
-                    full_log += "⚠️ Could not parse any molecules. Please check the format (e.g., 'Aspirin:CC(=O)OC1=CC=CC=C1C(=O)O').\n"
-            except Exception as e:
-                full_log += f"❌ Error parsing molecules list: {e}\n"
-                smiles_dict = {}
-        else:
-            full_log += "⚠️ Molecule input is empty. No analysis to perform.\n"
-        if smiles_dict:
-            screening_df, log_screening = simulate_virtual_screening(smiles_dict)
-            full_log += log_screening
-            admet_df, log_admet = predict_admet_properties(smiles_dict)
             full_log += log_admet
-            combined_viz_html = ""
-            log_viz = ""
-            for name, smiles in smiles_dict.items():
-                html_block, log_mol_viz = visualize_molecule_2d_3d(smiles, name)
-                combined_viz_html += html_block
-                log_viz += log_mol_viz
-            full_log += log_viz
-            pdb_data, log_pdb_fetch_2 = fetch_pdb_structure(phase2_pdb_id_input)
-            full_log += log_pdb_fetch_2
-            interaction_html, log_interaction = visualize_protein_ligand_interaction(pdb_data, phase2_pdb_id_input, phase2_ligand_resn)
-            full_log += log_interaction
             st.session_state.results_p2 = {
-                'screening_df': screening_df,
-                'admet_df': admet_df,
-                'combined_viz_html': combined_viz_html,
-                'interaction_html': interaction_html,
-                'molecules_used': smiles_dict
             }
-        else:
-            st.session_state.results_p2 = {}
-        full_log += "\n--- Phase 2 Analysis Complete ---"
-        st.session_state.log_p2 = full_log
     st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
-    if st.session_state.results_p2:
         res2 = st.session_state.results_p2
-        p2_tabs = st.tabs(["Virtual Screening & ADMET", "Molecule Visualization (2D & 3D)", "Protein-Ligand Interaction"])
         with p2_tabs[0]:
-            col1, col2 = st.columns(2)
-            with col1:
-                st.subheader("Virtual Screening Results (Simulated)")
-                st.dataframe(res2.get('screening_df', pd.DataFrame()), use_container_width=True, hide_index=True)
-            with col2:
-                st.subheader("ADMET Properties Prediction")
-                st.dataframe(res2.get('admet_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p2_tabs[1]:
-            molecules_used = res2.get('molecules_used', {})
-            if molecules_used:
-                st.subheader(f"Interactive 2D and 3D views of {len(molecules_used)} candidate molecules")
-                st.info(f"Currently visualizing: {', '.join(molecules_used.keys())}")
             else:
-                st.subheader("Interactive 2D and 3D views of candidate molecules")
-            if res2.get('combined_viz_html'):
-                st.components.v1.html(res2.get('combined_viz_html'), height=len(molecules_used) * 400 + 100, scrolling=True)
-            else:
-                st.warning("No molecule visualizations available. Please run the analysis first.")
-        with p2_tabs[2]:
-            st.subheader("Detailed view of the top candidate binding to the protein.")
-            if res2.get('interaction_html'):
-                st.components.v1.html(res2.get('interaction_html'), height=700, scrolling=False)
-            else:
-                st.warning("No protein-ligand interaction view available. Please run the analysis first.")
-# ===== TAB 3: PRECLINICAL DEVELOPMENT =====
 with tab3:
-    st.header("🧪 Step 3: In-Depth Candidate Analysis and Toxicity Prediction")
-    st.markdown("Perform a comprehensive analysis of the most promising lead compounds and use a simulated AI model to predict toxicity risk.")
-    with st.form(key="phase3_form"):
-        st.subheader("Analysis Controls")
-        run_phase3_btn = st.form_submit_button("🚀 Run Phase 3 Analysis", use_container_width=True)
-    if run_phase3_btn:
-        full_log = "--- Starting Phase 3 Analysis ---\n"
-        smiles_dict = get_phase3_molecules()
-        comp_props_df, log_comp_props = calculate_comprehensive_properties(smiles_dict)
-        full_log += log_comp_props
-        tox_df, log_tox = predict_toxicity(comp_props_df)
-        full_log += log_tox
-        combined_viz_html = ""
-        log_viz_p3 = ""
-        for name, smiles in smiles_dict.items():
-            html_block, log_mol_viz_p3 = visualize_molecule_2d_3d(smiles, name)
-            combined_viz_html += html_block
-            log_viz_p3 += log_mol_viz_p3
-        full_log += log_viz_p3
-        full_log += "\n--- Phase 3 Analysis Complete ---"
-        st.session_state.log_p3 = full_log
-        st.session_state.results_p3 = {
-            'comp_props_df': comp_props_df,
-            'tox_df': tox_df,
-            'combined_viz_html': combined_viz_html
-        }
     st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
-    if st.session_state.results_p3:
         res3 = st.session_state.results_p3
-        p3_tabs = st.tabs(["Comprehensive Properties & Toxicity", "Molecule Visualization (3D Gallery)"])
-        with p3_tabs[0]:
-            st.subheader("Comprehensive Molecular Properties & AI-Powered Toxicity Prediction (Simulated)")
-            col1, col2 = st.columns(2)
-            with col1:
-                st.dataframe(res3.get('comp_props_df', pd.DataFrame()), use_container_width=True, hide_index=True)
-            with col2:
-                st.dataframe(res3.get('tox_df', pd.DataFrame()), use_container_width=True, hide_index=True)
-        with p3_tabs[1]:
-            st.subheader("Interactive 3D gallery of the compounds under analysis.")
-            if res3.get('combined_viz_html'):
-                st.components.v1.html(res3.get('combined_viz_html'), height=1000, scrolling=True)
-# ===== TAB 4: POST-MARKET SURVEILLANCE =====
-with tab4:
-    st.header("📈 Step 4: Regulatory Submission and Pharmacovigilance")
-    st.markdown("Explore summaries of the documentation needed for regulatory approval and simulate how AI can monitor real-world data for adverse events.")
-    with st.form(key="phase4_form"):
-        st.subheader("Analysis Controls")
-        rwd_input = st.text_area("Enter new adverse events (comma-separated)", value="severe allergic reaction, joint pain, severe allergic reaction", height=100, key="p4_rwd")
-        run_phase4_btn = st.form_submit_button("🚀 Run Phase 4 Analysis", use_container_width=True)
-    if run_phase4_btn:
-        full_log = "--- Starting Phase 4 Analysis ---\n"
-        reg_df, log_reg = get_regulatory_summary()
-        full_log += log_reg
-        eth_df, log_eth = get_ethical_framework()
-        full_log += log_eth
-        rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
-        full_log += log_rwd
-        full_log += "\n--- Phase 4 Analysis Complete ---"
-        st.session_state.log_p4 = full_log
-        st.session_state.results_p4 = {
-            'rwd_df': rwd_df,
-            'plot_bar': plot_bar,
-            'reg_df': reg_df,
-            'eth_df': eth_df
-        }
     st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
-    if st.session_state.results_p4:
         res4 = st.session_state.results_p4
         p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
         with p4_tabs[0]:
             st.subheader("Simulated Adverse Event Analysis")
             if res4.get('plot_bar'):
-                st.pyplot(res4['plot_bar'])
             st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p4_tabs[1]:
-            col1, col2 = st.columns(2)
-            with col1:
-                st.subheader("AI/ML Documentation Summary for Submission")
-                st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
-            with col2:
-                st.subheader("Ethical Framework for AI in Healthcare")
-                st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)

 # AI-Powered Drug Discovery Pipeline Streamlit Application
 # This script integrates four phases of drug discovery into a single, interactive web app.
 import streamlit as st
 import pandas as pd
 import numpy as np
 import requests
 import io
 import re
     page_title="AI Drug Discovery Pipeline",
     page_icon="🔬",
     layout="wide",
+    initial_sidebar_state="collapsed",
 )
+# Custom CSS for a professional, dark theme
 def apply_custom_styling():
     st.markdown(
         """
         <style>
         @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
         html, body, [class*="st-"] {
             font-family: 'Roboto', sans-serif;
         }
             color: #FFF;
             background-color: #00A0FF;
         }
+        /* Ensure headers are white */
+        h1, h2, h3, h4, h5, h6 {
+            color: white !important;
+        }
         </style>
         """,
         unsafe_allow_html=True
         mol = Chem.MolFromSmiles(smiles)
         if mol:
             props = {
+                'Molecule': name,
                 'SMILES': smiles,
                 'MW': Descriptors.MolWt(mol),
                 'LogP': Descriptors.MolLogP(mol),
     if df.empty:
         return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
     analysis_df = df.copy()
     analysis_df['MW_OK'] = analysis_df['MW'] <= 500
     analysis_df['LogP_OK'] = analysis_df['LogP'] <= 5
     analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
     analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
     analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
     display_df = df.copy()
     display_df['Lipinski_Violations'] = analysis_df['Lipinski_Violations']
     display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: '✅ Yes' if x else '❌ No')
     log = "✅ Assessed drug-likeness using Lipinski's Rule of Five.\n"
     return analysis_df, display_df, log
     if df['Drug_Like'].dtype != bool:
         return None, f"Cannot plot: 'Drug_Like' column must be boolean, but it is {df['Drug_Like'].dtype}."
     df['Category'] = df['Drug_Like'].apply(lambda x: 'Drug-Like' if x else 'Non-Drug-Like')
     source = ColumnDataSource(df)
+    colors = ['#00D4AA', '#FF6B6B']
     color_mapper = factor_cmap('Category', palette=colors, factors=["Drug-Like", "Non-Drug-Like"])
     scatter_hover = HoverTool(tooltips=[
+        ("Compound", "@Molecule"), ("MW", "@MW{0.0} Da"), ("LogP", "@LogP{0.00}"),
+        ("HBD", "@HBD"), ("HBA", "@HBA"), ("TPSA", "@TPSA{0.0} Å²"), ("Category", "@Category")
     ])
     plot_config = {
+        'sizing_mode': 'scale_width', 'aspect_ratio': 1,
+        'background_fill_color': None, 'border_fill_color': None,
+        'outline_line_color': '#333333', 'min_border_left': 50,
+        'min_border_right': 50, 'min_border_top': 50, 'min_border_bottom': 50
     }
     def style_plot(p, x_label, y_label, title):
             p.legend.background_fill_alpha = 0.8
             p.legend.border_line_color = '#444444'
             p.legend.label_text_color = '#FFFFFF'
             p.legend.click_policy = "mute"
         return p
     p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
     p1.scatter('MW', 'LogP', source=source, legend_group='Category',
                color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
+    p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW ≤ 500")
+    p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP ≤ 5")
     style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
     p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
+    p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
+    p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD ≤ 5")
+    p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA ≤ 10")
     style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
     p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
+    p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
+    p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA ≤ 140")
+    p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds ≤ 10")
     style_plot(p3, "Topological Polar Surface Area (Å²)", "Rotatable Bonds", "Drug Permeability Indicators")
     p4_config = plot_config.copy()
+    p4_config['tools'] = "hover"
     p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
     p4 = figure(title="Drug-Likeness Distribution", **p4_config)
     counts = df['Category'].value_counts()
+    data = pd.DataFrame({'category': counts.index, 'value': counts.values})
+    data['angle'] = data['value']/data['value'].sum() * 2*pi
+    data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
     wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
+                     start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                      line_color="white", line_width=3, fill_color='color',
+                     legend_field='category', source=data)
+    p4.text([0], [0], text=[f"{len(df)}\nCompounds"], text_align="center", text_baseline="middle", text_color="white", text_font_size="14pt", text_font_style="bold")
+    donut_hover = HoverTool(tooltips=[("Category", "@category"), ("Count", "@value")], renderers=[wedge_renderer])
     p4.add_tools(donut_hover)
     style_plot(p4, "", "", "Compound Classification")
     p4.axis.visible = False
     p4.grid.visible = False
+    grid = gridplot([[p1, p2], [p3, p4]], sizing_mode='scale_width', toolbar_location='right', merge_tools=True)
     return grid, "✅ Generated enhanced molecular properties dashboard."
 # ===== Phase 2 Functions =====
 def get_phase2_molecules():
     """Returns an expanded list of common drugs with corrected SMILES."""
     return {
+        'Paracetamol': 'CC(=O)Nc1ccc(O)cc1', 'Ibuprofen': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
+        'Aspirin': 'CC(=O)Oc1ccccc1C(=O)O', 'Naproxen': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
+        'Diazepam': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12', 'Metformin': 'CN(C)C(=N)N=C(N)N',
+        'Loratadine': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1', 'Morphine': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
+        'Cetirizine': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO', 'Fluoxetine': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
+        'Amoxicillin': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C', 'Atorvastatin': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
+        'Ciprofloxacin': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12', 'Warfarin': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
         'Furosemide': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
     }
     log += f"✅ Predicted ADMET properties for {len(df)} molecules.\n"
     return df, log
 def visualize_molecule_2d_3d(smiles: str, name: str):
     """Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
     log = ""
         return combined_html, log
     except Exception as e:
         return f"<p>Error visualizing {name}: {e}</p>", f"❌ Error visualizing {name}: {e}"
+def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
+    """
+    Generates a protein-ligand interaction visualization using py3Dmol.
+    """
+    if not pdb_data:
+        return None, "Cannot generate interaction view: No PDB data provided."
     try:
+        viewer = py3Dmol.view(width='100%', height=650)
         viewer.setBackgroundColor('#1C1C1C')
+        # Add the protein structure
         viewer.addModel(pdb_data, "pdb")
+        # Style the protein (cartoon representation)
+        viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
+        # Highlight the ligand if specified
+        if ligand_resn:
+            viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
+            viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
+        # Add surface representation for binding site
+        viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
+        viewer.zoomTo({'resn': ligand_resn} if ligand_resn else {})
         html = viewer._make_html()
+        log = f"✅ Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
         return html, log
     except Exception as e:
+        return None, f"❌ Interaction visualization error: {e}"
 # ===== Phase 3 Functions =====
 def get_phase3_molecules():
     return {
     return pd.DataFrame(summary), "✅ Generated AI/ML documentation summary."
 def simulate_rwd_analysis(adverse_event_text):
+    """
+    Analyzes simulated adverse event text and generates a DataFrame and Bokeh plot.
+    """
     np.random.seed(42)
+    base_events = list(np.random.choice(
+        ['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever'],
+        100,
+        p=[0.25, 0.2, 0.15, 0.15, 0.1, 0.15]
+    ))
+    user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
+    all_events = base_events + user_terms
+    events_df = pd.DataFrame(all_events, columns=['Adverse_Event'])
+    event_counts = events_df['Adverse_Event'].value_counts().nlargest(10).sort_values(ascending=False)
+    results_df = event_counts.reset_index()
+    results_df.columns = ['Adverse_Event', 'Frequency']
+    log = f"✅ Analyzed {len(all_events)} total event reports. Identified {len(event_counts)} unique adverse events for plotting.\n"
+    # Create Bokeh Plot
+    source = ColumnDataSource(results_df)
+    y_range = results_df['Adverse_Event'].tolist()[::-1]
+    hover = HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")])
+    p = figure(
+        y_range=y_range, height=450, title="Top 10 Reported Adverse Events",
+        sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save",
+    )
+    p.add_tools(hover)
+    p.hbar(
+        y='Adverse_Event', right='Frequency', source=source, height=0.7,
+        color='#00A0FF', line_color='white', legend_label="Event Frequency"
+    )
+    # Style the plot for a dark theme
+    p.background_fill_color = "#1C1C1C"
+    p.border_fill_color = "#1C1C1C"
+    p.outline_line_color = '#333333'
+    p.title.text_color = "white"
+    p.title.text_font_size = '16pt'
+    p.title.align = "center"
+    p.xaxis.axis_label = "Frequency Count"
+    p.yaxis.axis_label = "Adverse Event"
+    p.axis.axis_label_text_color = "#CCCCCC"
+    p.axis.axis_label_text_font_size = "12pt"
+    p.axis.major_label_text_color = "#AAAAAA"
+    p.axis.major_label_text_font_size = "10pt"
+    p.grid.grid_line_alpha = 0.3
+    p.grid.grid_line_color = "#444444"
+    p.x_range.start = 0
+    p.legend.location = "top_right"
+    p.legend.background_fill_color = "#2A2A2A"
+    p.legend.background_fill_alpha = 0.7
+    p.legend.border_line_color = "#444444"
+    p.legend.label_text_color = "white"
+    return results_df, p, log
+def get_ethical_framework():
+    framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
+                 'Implementation Strategy': [
+                     'AI models prioritize patient outcomes and clinical efficacy.',
+                     'Toxicity prediction and pharmacovigilance models aim to minimize patient harm.',
+                     'Algorithms are audited for demographic bias in training data and predictions.',
+                     'Model cards and SHAP values are provided for key decision-making processes.'
+                 ]}
+    return pd.DataFrame(framework), "✅ Generated Ethical AI Framework summary."
+# --- 3. Streamlit UI Layout ---
+# Initialize session state variables
+if 'active_tab' not in st.session_state: st.session_state.active_tab = "Phase 1: Target Identification"
+if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
+if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
+if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
+if 'log_p4' not in st.session_state: st.session_state.log_p4 = "Status logs will appear here."
 if 'results_p1' not in st.session_state: st.session_state.results_p1 = {}
 if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
 if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
 if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
+# --- Header ---
+st.title("🔬 AI-Powered Drug Discovery Pipeline")
+st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
+# --- Main Tabs for Each Phase ---
 tab1, tab2, tab3, tab4 = st.tabs([
+    "**Phase 1:** Target Identification",
+    "**Phase 2:** Hit Discovery & ADMET",
+    "**Phase 3:** Lead Optimization",
+    "**Phase 4:** Pre-clinical & RWE"
 ])
+# --- Phase 1: Target Identification ---
 with tab1:
+    st.header("Phase 1: Target Identification & Initial Analysis")
+    st.markdown("""
+    In this initial phase, we identify and analyze a biological target (e.g., a protein) implicated in a disease.
+    We fetch its 3D structure and sequence data, then evaluate a set of initial compounds for their drug-like properties.
+    """)
+    st.subheader("Inputs & Controls")
+    pdb_id_input = st.text_input("Enter PDB ID (e.g., 2HU4 for Neuraminidase)", "2HU4")
+    protein_id_input = st.text_input("Enter NCBI Protein ID (e.g., P03468 for Neuraminidase)", "P03468")
+    st.markdown("---")
+    st.write("**Analyze Sample Compounds:**")
+    sample_molecules = create_sample_molecules()
+    selected_molecules = st.multiselect(
+        "Select from known drugs:",
+        options=list(sample_molecules.keys()),
+        default=["Oseltamivir", "Aspirin"]
+    )
+    if st.button("🚀 Run Phase 1 Analysis", key="run_p1"):
+        with st.spinner("Fetching data and calculating properties..."):
+            full_log = "--- Phase 1 Analysis Started ---\n"
+            pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
+            full_log += log_pdb
+            log_fasta = fetch_fasta_sequence(protein_id_input)
+            full_log += log_fasta
+            smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
+            properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
             full_log += log_props
+            analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
+            full_log += log_likeness
+            protein_view_html, log_3d = visualize_protein_3d(pdb_data, title=f"PDB: {pdb_id_input}")
+            full_log += log_3d
+            dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
+            full_log += log_dash
+            full_log += "\n--- Phase 1 Analysis Complete ---"
+            st.session_state.log_p1 = full_log
             st.session_state.results_p1 = {
+                'pdb_data': pdb_data,
+                'protein_view': protein_view_html,
+                'properties_df': display_df,
+                'dashboard': dashboard_plot
             }
     st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
+    st.subheader("Results")
+    if not st.session_state.results_p1:
+        st.info("Click 'Run Phase 1 Analysis' to generate and display results.")
+    else:
         res1 = st.session_state.results_p1
+        p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
         with p1_tabs[0]:
+            st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
+            if res1.get('protein_view'):
+                st.components.v1.html(res1['protein_view'], height=600, scrolling=False)
             else:
+                st.warning("Could not display 3D structure. Check PDB ID and logs.")
         with p1_tabs[1]:
+            st.subheader("Physicochemical Properties Analysis")
+            if res1.get('dashboard'):
+                st.bokeh_chart(res1['dashboard'], use_container_width=True)
+            st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
+# --- Phase 2: Hit Discovery & ADMET ---
 with tab2:
+    st.header("Phase 2: Virtual Screening & Early ADMET")
+    st.markdown("""
+    This phase simulates a virtual screening process to identify 'hits' from a larger library of compounds.
+    We predict their binding affinity to the target and assess their basic ADMET (Absorption, Distribution,
+    Metabolism, Excretion, Toxicity) profiles.
+    """)
+    st.subheader("Inputs & Controls")
+    p2_molecules = get_phase2_molecules()
+    st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
+    p2_pdb_id = st.text_input("Enter PDB ID for Interaction (e.g., 2HU4)", "2HU4", key="p2_pdb")
+    p2_ligand_resn = st.text_input("Ligand Residue Name in PDB (e.g., G39 for Oseltamivir)", "G39", key="p2_ligand")
+    if st.button("🚀 Run Phase 2 Analysis", key="run_p2"):
+        with st.spinner("Running virtual screening and ADMET predictions..."):
+            full_log = "--- Phase 2 Analysis Started ---\n"
+            screening_df, log_screen = simulate_virtual_screening(p2_molecules)
+            full_log += log_screen
+            admet_df, log_admet = predict_admet_properties(p2_molecules)
             full_log += log_admet
+            merged_df = pd.merge(screening_df, admet_df, on="Molecule")
+            pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
+            full_log += log_pdb_p2
+            interaction_view, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
+            full_log += log_interact
+            full_log += "\n--- Phase 2 Analysis Complete ---"
+            st.session_state.log_p2 = full_log
             st.session_state.results_p2 = {
+                'merged_df': merged_df,
+                'interaction_view': interaction_view
             }
     st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
+    st.subheader("Results")
+    if not st.session_state.results_p2:
+        st.info("Click 'Run Phase 2 Analysis' to generate and display results.")
+    else:
         res2 = st.session_state.results_p2
+        p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
         with p2_tabs[0]:
+            st.subheader("Virtual Screening & Early ADMET Predictions")
+            st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p2_tabs[1]:
+            st.subheader(f"Simulated Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
+            if res2.get('interaction_view'):
+                st.components.v1.html(res2['interaction_view'], height=700, scrolling=False)
             else:
+                st.warning("Could not display interaction view. Check inputs and logs.")
+# --- Phase 3: Lead Optimization ---
 with tab3:
+    st.header("Phase 3: Lead Compound Optimization")
+    st.markdown("""
+    In lead optimization, promising 'hit' compounds are refined to improve their efficacy and safety.
+    Here, we analyze a few selected lead candidates, perform more detailed property calculations,
+    and predict their toxicity risk using a simulated machine learning model.
+    """)
+    st.subheader("Inputs & Controls")
+    p3_molecules = get_phase3_molecules()
+    selected_leads = st.multiselect(
+        "Select lead compounds to optimize:",
+        options=list(p3_molecules.keys()),
+        default=['Oseltamivir', 'Remdesivir']
+    )
+    if st.button("🚀 Run Phase 3 Analysis", key="run_p3"):
+        with st.spinner("Analyzing lead compounds and predicting toxicity..."):
+            full_log = "--- Phase 3 Analysis Started ---\n"
+            smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
+            comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
+            full_log += log_comp
+            toxicity_df, log_tox = predict_toxicity(comp_props_df)
+            full_log += log_tox
+            final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
+            visuals = {}
+            for name, smiles in smiles_to_analyze_p3.items():
+                html_view, log_vis = visualize_molecule_2d_3d(smiles, name)
+                visuals[name] = html_view
+                full_log += log_vis
+            full_log += "\n--- Phase 3 Analysis Complete ---"
+            st.session_state.log_p3 = full_log
+            st.session_state.results_p3 = {
+                'final_df': final_df,
+                'visuals': visuals
+            }
     st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
+    st.subheader("Results")
+    if not st.session_state.results_p3:
+        st.info("Click 'Run Phase 3 Analysis' to generate and display results.")
+    else:
         res3 = st.session_state.results_p3
+        st.subheader("Lead Compound Analysis & Toxicity Prediction")
+        st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
+        st.subheader("2D & 3D Molecular Structures")
+        for name, visual_html in res3.get('visuals', {}).items():
+            st.components.v1.html(visual_html, height=430, scrolling=False)
+# --- Phase 4: Pre-clinical & RWE ---
+with tab4:
+    st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
+    st.markdown("""
+    This final phase simulates post-market analysis. We analyze text data for adverse events (pharmacovigilance)
+    and present documentation related to the AI models and ethical frameworks that would be required for regulatory submission.
+    """)
+    st.subheader("Inputs & Controls")
+    rwd_input = st.text_area(
+        "Enter simulated adverse event report text:",
+        "Patient reports include instances of headache, severe nausea, and occasional skin rash. Some noted dizziness after taking the medication.",
+        height=150
+    )
+    if st.button("🚀 Run Phase 4 Analysis", key="run_p4"):
+        with st.spinner("Analyzing real-world data and generating reports..."):
+            full_log = "--- Phase 4 Analysis Started ---\n"
+            reg_df, log_reg = get_regulatory_summary()
+            full_log += log_reg
+            eth_df, log_eth = get_ethical_framework()
+            full_log += log_eth
+            rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
+            full_log += log_rwd
+            full_log += "\n--- Phase 4 Analysis Complete ---"
+            st.session_state.log_p4 = full_log
+            st.session_state.results_p4 = {
+                'rwd_df': rwd_df,
+                'plot_bar': plot_bar,
+                'reg_df': reg_df,
+                'eth_df': eth_df
+            }
     st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
+    st.subheader("Results")
+    if not st.session_state.results_p4:
+        st.info("Click 'Run Phase 4 Analysis' to generate and display results.")
+    else:
         res4 = st.session_state.results_p4
         p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
         with p4_tabs[0]:
             st.subheader("Simulated Adverse Event Analysis")
             if res4.get('plot_bar'):
+                st.bokeh_chart(res4['plot_bar'], use_container_width=True)
             st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
         with p4_tabs[1]:
+            st.subheader("AI/ML Model Regulatory Summary")
+            st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
+            st.subheader("Ethical AI Framework")
+            st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)