Spaces:
Sleeping
Sleeping
| # app.py | |
| # AI-Powered Drug Discovery Pipeline Streamlit Application | |
| # This script integrates four phases of drug discovery into a single, interactive web app. | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import requests | |
| import io | |
| import re | |
| from PIL import Image | |
| import base64 | |
| # RDKit and BioPython imports | |
| from rdkit import Chem | |
| from rdkit.Chem import Draw, AllChem, Descriptors | |
| from Bio import SeqIO | |
| # Scikit-learn for ML models | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| # 3D Visualization | |
| import py3Dmol | |
| # Bokeh plotting | |
| from bokeh.plotting import figure | |
| from bokeh.models import ColumnDataSource, HoverTool | |
| from bokeh.layouts import gridplot | |
| from bokeh.transform import factor_cmap, cumsum | |
| from math import pi | |
| # Suppress warnings for cleaner output | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # --- Page Configuration --- | |
| st.set_page_config( | |
| page_title="AI Drug Discovery Pipeline", | |
| page_icon="π¬", | |
| layout="wide", | |
| initial_sidebar_state="collapsed", # Sidebar is removed, but this is good practice | |
| ) | |
| # Custom CSS for a professional, minimalist look | |
| def apply_custom_styling(): | |
| st.markdown( | |
| """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap'); | |
| html, body, [class*="st-"] { | |
| font-family: 'Roboto', sans-serif; | |
| } | |
| .stApp { | |
| background-color: rgb(28, 28, 28); | |
| color: white; | |
| } | |
| /* Tab styles */ | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 24px; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| height: 50px; | |
| white-space: pre-wrap; | |
| background: none; | |
| border-radius: 0px; | |
| border-bottom: 2px solid #333; | |
| padding: 10px 4px; | |
| color: #AAA; | |
| } | |
| .stTabs [data-baseweb="tab"]:hover { | |
| background: #222; | |
| color: #FFF; | |
| } | |
| .stTabs [aria-selected="true"] { | |
| border-bottom: 2px solid #00A0FF; /* Highlight color for active tab */ | |
| color: #FFF; | |
| } | |
| /* Button styles */ | |
| .stButton>button { | |
| border-color: #00A0FF; | |
| color: #00A0FF; | |
| } | |
| .stButton>button:hover { | |
| border-color: #FFF; | |
| color: #FFF; | |
| background-color: #00A0FF; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| apply_custom_styling() | |
| # --- 2. Core Functions from All Phases --- | |
| # These functions are adapted from the provided Python scripts. | |
| # ===== Phase 1 Functions ===== | |
| def fetch_pdb_structure(pdb_id: str): | |
| """ | |
| Fetches a PDB file and returns its content. | |
| """ | |
| log = "" | |
| try: | |
| url = f"https://files.rcsb.org/download/{pdb_id}.pdb" | |
| response = requests.get(url, timeout=20) | |
| if response.status_code == 200: | |
| log += f"β Successfully fetched PDB data for {pdb_id}.\n" | |
| return response.text, log | |
| else: | |
| log += f"β οΈ Failed to fetch PDB file for {pdb_id} (Status: {response.status_code}). Please check the PDB ID and try again.\n" | |
| return None, log | |
| except Exception as e: | |
| log += f"β An error occurred while fetching PDB data: {e}\n" | |
| return None, log | |
| def fetch_fasta_sequence(protein_id: str): | |
| """ | |
| Fetches a protein's FASTA sequence from NCBI. | |
| """ | |
| log = "" | |
| try: | |
| url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id={protein_id}&rettype=fasta&retmode=text" | |
| response = requests.get(url, timeout=20) | |
| if response.status_code == 200: | |
| parsed_fasta = SeqIO.read(io.StringIO(response.text), "fasta") | |
| log += f"β Successfully fetched FASTA sequence for {protein_id}.\n\n" | |
| log += f"--- Protein Sequence Information ---\n" | |
| log += f"ID: {parsed_fasta.id}\n" | |
| log += f"Description: {parsed_fasta.description}\n" | |
| log += f"Sequence Length: {len(parsed_fasta.seq)}\n" | |
| log += f"Sequence Preview: {parsed_fasta.seq[:60]}...\n" | |
| return log | |
| else: | |
| log += f"β οΈ Failed to fetch FASTA file (Status: {response.status_code}).\n" | |
| return log | |
| except Exception as e: | |
| log += f"β An error occurred while fetching FASTA data: {e}\n" | |
| return log | |
| def visualize_protein_3d(pdb_data: str, title="Protein 3D Structure"): | |
| """ | |
| Generates an interactive 3D protein visualization using py3Dmol. | |
| """ | |
| if not pdb_data: | |
| return None, "Cannot generate 3D view: No PDB data provided." | |
| try: | |
| viewer = py3Dmol.view(width='100%', height=600) | |
| viewer.setBackgroundColor('#1C1C1C') | |
| viewer.addModel(pdb_data, "pdb") | |
| viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}}) | |
| viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'}) | |
| viewer.zoomTo() | |
| html = viewer._make_html() | |
| log = f"β Generated 3D visualization for {title}." | |
| return html, log | |
| except Exception as e: | |
| return None, f"β 3D visualization error: {e}" | |
| def create_sample_molecules(): | |
| """ | |
| Returns a dictionary of sample molecules in Name:SMILES format. | |
| """ | |
| return { | |
| "Oseltamivir": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C", | |
| "Zanamivir": "C[C@H](N)C(=O)N[C@H]1[C@@H](O)C=C(O[C@H]1[C@@H](O)[C@H](O)CO)C(O)=O", | |
| "Aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O", | |
| "Ibuprofen": "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O", | |
| } | |
| def calculate_molecular_properties(smiles_dict: dict): | |
| """ | |
| Calculates key physicochemical properties for a dictionary of molecules using RDKit. | |
| """ | |
| properties = [] | |
| log = "" | |
| for name, smiles in smiles_dict.items(): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if mol: | |
| props = { | |
| 'Molecule': name, # Use the provided name | |
| 'SMILES': smiles, | |
| 'MW': Descriptors.MolWt(mol), | |
| 'LogP': Descriptors.MolLogP(mol), | |
| 'HBD': Descriptors.NumHDonors(mol), | |
| 'HBA': Descriptors.NumHAcceptors(mol), | |
| 'TPSA': Descriptors.TPSA(mol), | |
| 'RotBonds': Descriptors.NumRotatableBonds(mol), | |
| } | |
| properties.append(props) | |
| else: | |
| log += f"β οΈ Invalid SMILES string skipped for {name}: {smiles}\n" | |
| df = pd.DataFrame(properties).round(2) | |
| log += f"β Calculated properties for {len(df)} valid molecules.\n" | |
| return df, log | |
| def assess_drug_likeness(df: pd.DataFrame): | |
| """ | |
| Assesses drug-likeness based on Lipinski's Rule of Five. | |
| This version returns a boolean for plotting and a formatted string for display. | |
| """ | |
| if df.empty: | |
| return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data." | |
| # Create a copy for analysis to avoid modifying the original dataframe | |
| analysis_df = df.copy() | |
| analysis_df['MW_OK'] = analysis_df['MW'] <= 500 | |
| analysis_df['LogP_OK'] = analysis_df['LogP'] <= 5 | |
| analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5 | |
| analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10 | |
| analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1) | |
| # This boolean column is for the plotting function | |
| analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1 | |
| # Create a separate DataFrame for display purposes with emojis | |
| display_df = df.copy() | |
| display_df['Lipinski_Violations'] = analysis_df['Lipinski_Violations'] | |
| display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'β Yes' if x else 'β No') | |
| log = "β Assessed drug-likeness using Lipinski's Rule of Five.\n" | |
| # Return both the analysis_df (for plotting) and display_df (for tables) | |
| return analysis_df, display_df, log | |
| def plot_properties_dashboard(df: pd.DataFrame): | |
| """Creates a professional 2x2 dashboard of molecular property visualizations using Bokeh.""" | |
| from math import pi, cos, sin | |
| if df.empty or 'Drug_Like' not in df.columns: | |
| return None, "Cannot plot: No analysis data or 'Drug_Like' column missing." | |
| if df['Drug_Like'].dtype != bool: | |
| return None, f"Cannot plot: 'Drug_Like' column must be boolean, but it is {df['Drug_Like'].dtype}." | |
| # Prepare data | |
| df['Category'] = df['Drug_Like'].apply(lambda x: 'Drug-Like' if x else 'Non-Drug-Like') | |
| source = ColumnDataSource(df) | |
| # Professional color palette | |
| colors = ['#00D4AA', '#FF6B6B'] # Teal for drug-like, coral for non-drug-like | |
| color_mapper = factor_cmap('Category', palette=colors, factors=["Drug-Like", "Non-Drug-Like"]) | |
| # Enhanced hover tooltip | |
| hover = HoverTool(tooltips=[ | |
| ("Compound", "@Molecule"), | |
| ("MW", "@MW{0.0} Da"), | |
| ("LogP", "@LogP{0.00}"), | |
| ("HBD", "@HBD"), | |
| ("HBA", "@HBA"), | |
| ("TPSA", "@TPSA{0.0} Γ Β²"), | |
| ("Category", "@Category") | |
| ]) | |
| # Common plot configuration - responsive plots with no background fill | |
| plot_config = { | |
| 'tools': [hover, 'pan,wheel_zoom,box_zoom,reset,save'], | |
| 'sizing_mode': 'scale_width', | |
| 'background_fill_color': None, | |
| 'border_fill_color': None, | |
| 'outline_line_color': '#333333', | |
| 'min_border_left': 50, | |
| 'min_border_right': 50, | |
| 'min_border_top': 50, | |
| 'min_border_bottom': 50 | |
| } | |
| def style_plot(p, x_label, y_label, title): | |
| """Apply consistent professional styling to plots.""" | |
| p.title.text = title | |
| p.title.text_color = '#FFFFFF' | |
| p.title.text_font_size = '14pt' | |
| p.title.text_font_style = 'bold' | |
| p.xaxis.axis_label = x_label | |
| p.yaxis.axis_label = y_label | |
| p.axis.axis_label_text_color = '#CCCCCC' | |
| p.axis.axis_label_text_font_size = '11pt' | |
| p.axis.major_label_text_color = '#AAAAAA' | |
| p.axis.major_label_text_font_size = '10pt' | |
| p.grid.grid_line_color = '#2A2A2A' | |
| p.grid.grid_line_alpha = 0.3 | |
| if p.legend: | |
| p.legend.location = "top_right" | |
| p.legend.background_fill_color = '#1A1A1A' | |
| p.legend.background_fill_alpha = 0.8 | |
| p.legend.border_line_color = '#444444' | |
| p.legend.label_text_color = '#FFFFFF' | |
| p.legend.label_text_font_size = '10pt' | |
| p.legend.click_policy = "mute" | |
| p.legend.glyph_height = 15 | |
| p.legend.spacing = 5 | |
| return p | |
| # Plot 1: MW vs LogP with Lipinski guidelines | |
| p1 = figure(title="Molecular Weight vs LogP", **plot_config) | |
| p1.scatter('MW', 'LogP', source=source, legend_group='Category', | |
| color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5) | |
| # Add Lipinski rule lines | |
| p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], | |
| line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW β€ 500") | |
| p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], | |
| line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP β€ 5") | |
| style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP") | |
| # Plot 2: HBD vs HBA | |
| p2 = figure(title="Hydrogen Bonding Profile", **plot_config) | |
| p2.scatter('HBD', 'HBA', source=source, legend_group='Category', | |
| color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5) | |
| # Add Lipinski rule lines | |
| p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], | |
| line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD β€ 5") | |
| p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], | |
| line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA β€ 10") | |
| style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding") | |
| # --- MODIFICATION --- | |
| # Plot 3: TPSA vs Rotatable Bonds with guidelines | |
| p3 = figure(title="Molecular Flexibility & Polarity", **plot_config) | |
| p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', | |
| color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5) | |
| # Add permeability guideline lines | |
| p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], | |
| line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA β€ 140") | |
| p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], | |
| line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds β€ 10") | |
| style_plot(p3, "Topological Polar Surface Area (Γ Β²)", "Rotatable Bonds", "Drug Permeability Indicators") | |
| # Plot 4: Enhanced Donut Chart | |
| p4_config = plot_config.copy() | |
| p4_config.update({'tools': "hover", 'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)}) | |
| p4 = figure(title="Drug-Likeness Distribution", **p4_config) | |
| # Calculate percentages and create donut chart | |
| counts = df['Category'].value_counts() | |
| total = counts.sum() | |
| data = pd.DataFrame({ | |
| 'category': counts.index, | |
| 'value': counts.values, | |
| 'percentage': (counts.values / total * 100).round(1), | |
| 'angle': counts.values / total * 2 * pi, | |
| 'color': [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index] | |
| }) | |
| # Calculate start and end angles for each wedge | |
| data['start_angle'] = 0 | |
| data['end_angle'] = 0 | |
| cumulative_angle = 0 | |
| for i in range(len(data)): | |
| data.iloc[i, data.columns.get_loc('start_angle')] = cumulative_angle | |
| cumulative_angle += data.iloc[i]['angle'] | |
| data.iloc[i, data.columns.get_loc('end_angle')] = cumulative_angle | |
| donut_source = ColumnDataSource(data) | |
| # Create donut using annular wedges (outer ring) - sized to fit within boundaries | |
| p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45, | |
| start_angle='start_angle', end_angle='end_angle', | |
| line_color="white", line_width=3, fill_color='color', | |
| legend_field='category', source=donut_source) | |
| # Add percentage text to each slice | |
| for i, row in data.iterrows(): | |
| # Calculate middle angle for text positioning | |
| mid_angle = (row['start_angle'] + row['end_angle']) / 2 | |
| # Position text at middle radius of the annular wedge | |
| text_radius = 0.35 | |
| x_pos = text_radius * cos(mid_angle) | |
| y_pos = text_radius * sin(mid_angle) | |
| p4.text([x_pos], [y_pos], text=[f"{row['percentage']:.1f}%"], | |
| text_align="center", text_baseline="middle", | |
| text_color="white", text_font_size="11pt", text_font_style="bold") | |
| # Add center text | |
| p4.text([0], [0], text=[f"{len(df)}\nCompounds"], | |
| text_align="center", text_baseline="middle", | |
| text_color="white", text_font_size="14pt", text_font_style="bold") | |
| # Custom hover for donut | |
| p4.add_tools(HoverTool(tooltips=[("Category", "@category"), | |
| ("Count", "@value"), | |
| ("Percentage", "@percentage{0.0}%")])) | |
| style_plot(p4, "", "", "Compound Classification") | |
| p4.axis.visible = False | |
| p4.grid.visible = False | |
| # Create responsive grid layout | |
| grid = gridplot([[p1, p2], [p3, p4]], sizing_mode='scale_width', | |
| toolbar_location='right', merge_tools=True) | |
| return grid, "β Generated enhanced molecular properties dashboard." | |
| # ===== Phase 2 Functions ===== | |
| def get_phase2_molecules(): | |
| """Returns an expanded list of common drugs with corrected SMILES.""" | |
| return { | |
| 'Paracetamol': 'CC(=O)Nc1ccc(O)cc1', | |
| 'Ibuprofen': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1', | |
| 'Aspirin': 'CC(=O)Oc1ccccc1C(=O)O', | |
| 'Naproxen': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1', | |
| 'Diazepam': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12', | |
| 'Metformin': 'CN(C)C(=N)N=C(N)N', | |
| 'Loratadine': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1', | |
| 'Morphine': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5', | |
| 'Cetirizine': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO', | |
| 'Fluoxetine': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1', | |
| 'Amoxicillin': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C', | |
| 'Atorvastatin': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1', | |
| 'Ciprofloxacin': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12', | |
| 'Warfarin': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C', | |
| 'Furosemide': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N', | |
| } | |
| def simulate_virtual_screening(smiles_dict: dict): | |
| np.random.seed(42) | |
| scores = np.random.uniform(2.0, 9.8, len(smiles_dict)) | |
| results = [{'Molecule': name, 'SMILES': smiles, 'Predicted_Binding_Affinity': round(score, 2)} for (name, smiles), score in zip(smiles_dict.items(), scores)] | |
| df = pd.DataFrame(results).sort_values('Predicted_Binding_Affinity', ascending=False).reset_index(drop=True) | |
| df['Ranking'] = df.index + 1 | |
| return df, f"β Simulated virtual screening for {len(df)} molecules.\n" | |
| def predict_admet_properties(smiles_dict: dict): | |
| admet_data = [] | |
| log = "" | |
| for i, (name, smiles) in enumerate(smiles_dict.items()): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if not mol: continue | |
| mw, logp, hbd, hba = Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHDonors(mol), Descriptors.NumHAcceptors(mol) | |
| np.random.seed(42 + i) | |
| admet_data.append({'Molecule': name, 'MW': round(mw, 2), 'LogP': round(logp, 2), 'HBD': hbd, 'HBA': hba, | |
| 'Solubility (logS)': round(np.random.uniform(-4, -1), 2), | |
| 'Toxicity Risk': round(np.random.uniform(0.05, 0.4), 3), | |
| 'Lipinski Violations': sum([mw > 500, logp > 5, hbd > 5, hba > 10])}) | |
| df = pd.DataFrame(admet_data) | |
| log += f"β Predicted ADMET properties for {len(df)} molecules.\n" | |
| return df, log | |
| # --- MODIFIED FUNCTION --- | |
| # This is the updated function to correctly render 2D molecules on a dark background. | |
| def visualize_molecule_2d_3d(smiles: str, name: str): | |
| """Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule.""" | |
| log = "" | |
| try: | |
| mol = Chem.MolFromSmiles(smiles) | |
| if not mol: return f"<p>Invalid SMILES for {name}</p>", f"β Invalid SMILES for {name}" | |
| drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300) | |
| # Set dark theme colors for 2D drawing | |
| drawer.drawOptions().clearBackground = False | |
| drawer.drawOptions().addStereoAnnotation = True | |
| drawer.drawOptions().baseFontSize = 0.8 | |
| drawer.drawOptions().circleAtoms = False | |
| drawer.drawOptions().highlightColour = (1, 0.5, 0) # Orange for highlights | |
| # Set colors for dark background visibility | |
| drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11) # Dark background | |
| drawer.drawOptions().symbolColour = (1, 1, 1) # White symbols | |
| drawer.drawOptions().defaultColour = (1, 1, 1) # White default color | |
| # Try to set annotation color (this might help with (R)/(S) labels) | |
| try: | |
| drawer.drawOptions().annotationColour = (1, 1, 1) # White annotations | |
| except: | |
| pass | |
| drawer.DrawMolecule(mol) | |
| drawer.FinishDrawing() | |
| svg_2d = drawer.GetDrawingText().replace('svg:', '') | |
| # More aggressive SVG text color fixes - target all possible black text variations | |
| # First, comprehensive string replacements | |
| svg_2d = svg_2d.replace('stroke="black"', 'stroke="white"') | |
| svg_2d = svg_2d.replace('fill="black"', 'fill="white"') | |
| svg_2d = svg_2d.replace('stroke="#000000"', 'stroke="#FFFFFF"') | |
| svg_2d = svg_2d.replace('fill="#000000"', 'fill="#FFFFFF"') | |
| svg_2d = svg_2d.replace('stroke="#000"', 'stroke="#FFF"') | |
| svg_2d = svg_2d.replace('fill="#000"', 'fill="#FFF"') | |
| svg_2d = svg_2d.replace('stroke:black', 'stroke:white') | |
| svg_2d = svg_2d.replace('fill:black', 'fill:white') | |
| svg_2d = svg_2d.replace('stroke:#000000', 'stroke:#FFFFFF') | |
| svg_2d = svg_2d.replace('fill:#000000', 'fill:#FFFFFF') | |
| svg_2d = svg_2d.replace('stroke:#000', 'stroke:#FFF') | |
| svg_2d = svg_2d.replace('fill:#000', 'fill="#FFF"') | |
| svg_2d = svg_2d.replace('stroke="rgb(0,0,0)"', 'stroke="rgb(255,255,255)"') | |
| svg_2d = svg_2d.replace('fill="rgb(0,0,0)"', 'fill="rgb(255,255,255)"') | |
| svg_2d = svg_2d.replace('stroke:rgb(0,0,0)', 'stroke:rgb(255,255,255)') | |
| svg_2d = svg_2d.replace('fill:rgb(0,0,0)', 'fill:rgb(255,255,255)') | |
| svg_2d = svg_2d.replace('color="black"', 'color="white"') | |
| svg_2d = svg_2d.replace('color:#000000', 'color:#FFFFFF') | |
| svg_2d = svg_2d.replace('color:#000', 'color:#FFF') | |
| # Aggressive regex-based fixes for all text elements | |
| # Remove any existing fill attributes from text elements and add white fill | |
| svg_2d = re.sub(r'<text([^>]*?)\s+fill="[^"]*"([^>]*?)>', r'<text\1\2 fill="white">', svg_2d) | |
| svg_2d = re.sub(r'<text([^>]*?)(?<!fill="white")>', r'<text\1 fill="white">', svg_2d) | |
| # Fix style attributes in text elements | |
| svg_2d = re.sub(r'<text([^>]*?)style="([^"]*?)fill:\s*(?:black|#000000|#000|rgb\(0,0,0\))([^"]*?)"([^>]*?)>', | |
| r'<text\1style="\2fill:white\3"\4>', svg_2d) | |
| # If text elements don't have any fill specified, ensure they get white | |
| svg_2d = re.sub(r'<text(?![^>]*fill=)([^>]*?)>', r'<text fill="white"\1>', svg_2d) | |
| # Clean up any duplicate fill attributes | |
| svg_2d = re.sub(r'fill="white"\s+fill="white"', 'fill="white"', svg_2d) | |
| # Final catch-all: replace any remaining black in the entire SVG | |
| svg_2d = re.sub(r'\bblack\b', 'white', svg_2d) | |
| svg_2d = re.sub(r'#000000', '#FFFFFF', svg_2d) | |
| svg_2d = re.sub(r'#000\b', '#FFF', svg_2d) | |
| svg_2d = re.sub(r'rgb\(0,\s*0,\s*0\)', 'rgb(255,255,255)', svg_2d) | |
| # Embed the SVG within a div with a dark background for consistency | |
| svg_2d = f'<div style="background-color: #1C1C1C; padding: 10px; border-radius: 5px;">{svg_2d}</div>' | |
| mol_3d = Chem.AddHs(mol) | |
| AllChem.EmbedMolecule(mol_3d, randomSeed=42) | |
| AllChem.MMFFOptimizeMolecule(mol_3d) | |
| sdf_data = Chem.MolToMolBlock(mol_3d) | |
| viewer = py3Dmol.view(width=400, height=300) | |
| viewer.setBackgroundColor('#1C1C1C') | |
| viewer.addModel(sdf_data, "sdf") | |
| viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}}) | |
| viewer.zoomTo() | |
| html_3d = viewer._make_html() | |
| combined_html = f""" | |
| <div style="display: flex; flex-direction: row; align-items: center; justify-content: space-around; border: 1px solid #444; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #2b2b2b;"> | |
| <div style="text-align: center;"> | |
| <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (2D Structure)</h4> | |
| {svg_2d} | |
| </div> | |
| <div style="text-align: center;"> | |
| <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (3D Interactive)</h4> | |
| {html_3d} | |
| </div> | |
| </div> | |
| """ | |
| log += f"β Generated 2D/3D view for {name}.\n" | |
| return combined_html, log | |
| except Exception as e: | |
| return f"<p>Error visualizing {name}: {e}</p>", f"β Error visualizing {name}: {e}" | |
| def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn='G39'): | |
| """Visualizes a protein-ligand binding site using py3Dmol.""" | |
| if not pdb_data: return None, "Cannot generate view: No PDB data provided." | |
| try: | |
| viewer = py3Dmol.view(width='100%', height=700) | |
| viewer.setBackgroundColor('#1C1C1C') | |
| viewer.addModel(pdb_data, "pdb") | |
| viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}}) | |
| viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'lightblue'}) | |
| viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.3}, 'sphere': {'scale': 0.4, 'colorscheme': 'greenCarbon'}}) | |
| viewer.addStyle({'within': {'distance': 4, 'sel': {'resn': ligand_resn}}}, {'stick': {'colorscheme': 'orangeCarbon', 'radius': 0.2}}) | |
| viewer.zoomTo({'resn': ligand_resn}) | |
| html = viewer._make_html() | |
| log = (f"β Generated protein-ligand interaction view for PDB {pdb_id}.\n" | |
| f"π’ Green: Ligand ({ligand_resn})\n" | |
| f"π Orange: Residues within 4Γ of ligand\n") | |
| return html, log | |
| except Exception as e: | |
| return None, f"β Protein-ligand visualization error: {e}" | |
| # ===== Phase 3 Functions ===== | |
| def get_phase3_molecules(): | |
| return { | |
| 'Oseltamivir': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C', | |
| 'Aspirin': 'CC(=O)OC1=CC=CC=C1C(=O)O', | |
| 'Remdesivir': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4', | |
| 'Penicillin G': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C' | |
| } | |
| def calculate_comprehensive_properties(smiles_dict: dict): | |
| analysis = [] | |
| log = "" | |
| for name, smiles in smiles_dict.items(): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if not mol: continue | |
| mw, logp, hbd, hba = Descriptors.MolWt(mol), Descriptors.MolLogP(mol), Descriptors.NumHDonors(mol), Descriptors.NumHAcceptors(mol) | |
| violations = sum([mw > 500, logp > 5, hbd > 5, hba > 10]) | |
| analysis.append({'Compound': name, 'Molecular_Weight': mw, 'LogP': logp, 'HBD': hbd, 'HBA': hba, | |
| 'TPSA': Descriptors.TPSA(mol), 'Rotatable_Bonds': Descriptors.NumRotatableBonds(mol), | |
| 'Aromatic_Rings': Descriptors.NumAromaticRings(mol), | |
| 'Lipinski_Violations': violations, | |
| 'Drug_Like': 'β Yes' if violations <= 1 else 'β No'}) | |
| df = pd.DataFrame(analysis).round(2) | |
| log += f"β Calculated comprehensive properties for {len(df)} compounds.\n" | |
| return df, log | |
| def predict_toxicity(properties_df: pd.DataFrame): | |
| if properties_df.empty: return pd.DataFrame(), "Cannot predict toxicity: No properties data." | |
| np.random.seed(42) | |
| n_compounds = 500 | |
| training_data = pd.DataFrame({'molecular_weight': np.random.normal(400, 100, n_compounds), | |
| 'logp': np.random.normal(2.5, 1.5, n_compounds), | |
| 'tpsa': np.random.normal(80, 30, n_compounds), | |
| 'rotatable_bonds': np.random.randint(0, 15, n_compounds), | |
| 'aromatic_rings': np.random.randint(0, 5, n_compounds)}) | |
| toxicity_score = ((training_data['molecular_weight'] > 550) * 0.4 + (abs(training_data['logp']) > 4.5) * 0.4 + np.random.random(n_compounds) * 0.2) | |
| training_data['toxic'] = (toxicity_score > 0.5).astype(int) | |
| features = ['molecular_weight', 'logp', 'tpsa', 'rotatable_bonds', 'aromatic_rings'] | |
| rf_model = RandomForestClassifier(n_estimators=50, random_state=42) | |
| rf_model.fit(training_data[features], training_data['toxic']) | |
| X_pred = properties_df[['Molecular_Weight', 'LogP', 'TPSA', 'Rotatable_Bonds', 'Aromatic_Rings']] | |
| X_pred.columns = features | |
| toxicity_prob = rf_model.predict_proba(X_pred)[:, 1] | |
| results_df = properties_df[['Compound']].copy() | |
| results_df['Toxicity_Probability'] = np.round(toxicity_prob, 3) | |
| results_df['Predicted_Risk'] = ["π’ LOW" if p < 0.3 else "π‘ MODERATE" if p < 0.7 else "π΄ HIGH" for p in toxicity_prob] | |
| return results_df, "β Predicted toxicity using a pre-trained simulation model.\n" | |
| # ===== Phase 4 Functions ===== | |
| def get_regulatory_summary(): | |
| summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'], | |
| 'Description': ['Data sourced from ChEMBL, PDB, GISAID. Bias assessed via geographic distribution analysis.', | |
| 'Graph Convolutional Network (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).', | |
| 'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.', | |
| 'SHAP used for patient stratification model outputs.']} | |
| return pd.DataFrame(summary), "β Generated AI/ML documentation summary." | |
| def simulate_rwd_analysis(adverse_event_text): | |
| np.random.seed(42) | |
| base_events = list(np.random.choice(['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever'], 100, p=[0.25, 0.2, 0.15, 0.15, 0.15, 0.1])) | |
| user_events = [e.strip().lower() for e in adverse_event_text.split(',') if e.strip()] | |
| all_events = base_events + user_events | |
| event_counts = pd.Series(all_events).value_counts() | |
| log = f"β Analyzed {len(all_events)} simulated adverse event reports.\n" | |
| plt.style.use('dark_background') | |
| fig_bar, ax_bar = plt.subplots(figsize=(10, 6)) | |
| fig_bar.patch.set_facecolor('none') | |
| ax_bar.set_facecolor('none') | |
| sns.barplot(x=event_counts.values, y=event_counts.index, palette='viridis', ax=ax_bar, orient='h') | |
| ax_bar.set_title('Simulated Adverse Event Frequencies') | |
| ax_bar.set_xlabel('Number of Reports') | |
| ax_bar.set_ylabel('Adverse Event') | |
| plt.tight_layout() | |
| return event_counts.reset_index().rename(columns={'index': 'Event', 0: 'Count'}), fig_bar, log | |
| def get_ethical_framework(): | |
| framework = {'Pillar': ['1. Beneficence & Non-Maleficence', '2. Justice & Fairness', '3. Transparency & Explainability', '4. Accountability & Governance'], | |
| 'Description': ['AI should help patients and do no harm. Requires rigorous validation and safety monitoring.', | |
| 'AI must not create or worsen health disparities. Requires bias detection and mitigation.', | |
| 'Clinical decisions influenced by AI must be understandable. Requires interpretable models.', | |
| 'Clear lines of responsibility for AI systems must be established. Requires human oversight.']} | |
| return pd.DataFrame(framework), "β Generated ethical framework summary." | |
| # --- 3. Streamlit Interface Definition --- | |
| st.title("π¬ AI-Powered Drug Discovery Pipeline") | |
| st.markdown(""" | |
| Welcome to the AI Drug Discovery Pipeline Demonstrator. This application integrates the four major phases of drug development, | |
| showcasing how AI and computational tools can accelerate the process from target identification to post-market surveillance. | |
| Navigate through the tabs below to explore each phase. | |
| """) | |
| # Initialize session state for logs and results | |
| if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Phase 1 logs will appear here." | |
| if 'results_p1' not in st.session_state: st.session_state.results_p1 = {} | |
| if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Phase 2 logs will appear here." | |
| if 'results_p2' not in st.session_state: st.session_state.results_p2 = {} | |
| if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Phase 3 logs will appear here." | |
| if 'results_p3' not in st.session_state: st.session_state.results_p3 = {} | |
| if 'log_p4' not in st.session_state: st.session_state.log_p4 = "Phase 4 logs will appear here." | |
| if 'results_p4' not in st.session_state: st.session_state.results_p4 = {} | |
| tab1, tab2, tab3, tab4 = st.tabs([ | |
| "Phase 1: Discovery & Target ID", | |
| "Phase 2: Lead Generation & Optimization", | |
| "Phase 3: Preclinical Development", | |
| "Phase 4: Implementation & Post-Market" | |
| ]) | |
| # ===== TAB 1: DISCOVERY & TARGET IDENTIFICATION ===== | |
| with tab1: | |
| st.header("𧬠Step 1: Target Identification and Initial Analysis") | |
| st.markdown("Fetch protein data from public databases and perform a high-level analysis of potential drug-like molecules.") | |
| with st.form(key="phase1_form"): | |
| st.subheader("Analysis Controls") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| pdb_id_input = st.text_input("Enter PDB ID", value="3B7E", key="p1_pdb") | |
| protein_id_input = st.text_input("Enter Protein ID (for FASTA)", value="ACF54602.1", key="p1_protein") | |
| with col2: | |
| default_molecules_p1 = create_sample_molecules() | |
| default_molecules_text_p1 = "\n".join([f"{name}:{smiles}" for name, smiles in default_molecules_p1.items()]) | |
| molecules_input_p1 = st.text_area( | |
| "Molecules (Name:SMILES, one per line)", | |
| value=default_molecules_text_p1, | |
| height=150, | |
| key="p1_molecules" | |
| ) | |
| run_phase1_btn = st.form_submit_button("π Run Phase 1 Analysis", use_container_width=True) | |
| if run_phase1_btn: | |
| full_log = "--- Starting Phase 1 Analysis ---\n" | |
| # Parse molecules from the text area | |
| smiles_dict_p1 = {} | |
| if molecules_input_p1.strip(): | |
| try: | |
| for line in molecules_input_p1.strip().split('\n'): | |
| cleaned_line = line.replace('\xa0', ' ').strip() | |
| if ':' in cleaned_line: | |
| name, smiles = cleaned_line.split(':', 1) | |
| smiles_dict_p1[name.strip()] = smiles.strip() | |
| if smiles_dict_p1: | |
| full_log += f"β Successfully parsed {len(smiles_dict_p1)} molecules from input.\n" | |
| else: | |
| full_log += "β οΈ Could not parse any molecules. Please check the format (e.g., 'Aspirin:CC...').\n" | |
| except Exception as e: | |
| full_log += f"β Error parsing molecules list: {e}\n" | |
| smiles_dict_p1 = {} | |
| else: | |
| full_log += "β οΈ Molecule input is empty. No analysis to perform.\n" | |
| if smiles_dict_p1: | |
| pdb_data, log_pdb_fetch = fetch_pdb_structure(pdb_id_input) | |
| full_log += log_pdb_fetch | |
| fasta_log = fetch_fasta_sequence(protein_id_input) | |
| full_log += fasta_log | |
| protein_view_html, log_3d_viz = visualize_protein_3d(pdb_data, pdb_id_input) | |
| full_log += log_3d_viz | |
| props_df, log_props = calculate_molecular_properties(smiles_dict_p1) | |
| full_log += log_props | |
| analysis_df, display_df, log_lipinski = assess_drug_likeness(props_df) | |
| full_log += log_lipinski | |
| props_plot, log_plot = plot_properties_dashboard(analysis_df) # This now calls the Bokeh function | |
| full_log += log_plot | |
| lipinski_cols = ['Molecule', 'MW', 'LogP', 'HBD', 'HBA', 'Lipinski_Violations', 'Drug_Like'] | |
| lipinski_subset_df = display_df[lipinski_cols] if not display_df.empty else pd.DataFrame(columns=lipinski_cols) | |
| st.session_state.results_p1 = { | |
| 'protein_view_html': protein_view_html, | |
| 'fasta_log': fasta_log, | |
| 'lipinski_subset_df': lipinski_subset_df, | |
| 'props_df': props_df, | |
| 'props_plot': props_plot | |
| } | |
| else: | |
| st.session_state.results_p1 = {} | |
| full_log += "\n--- Phase 1 Analysis Complete ---" | |
| st.session_state.log_p1 = full_log | |
| st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area") | |
| if st.session_state.results_p1: | |
| res1 = st.session_state.results_p1 | |
| p1_tabs = st.tabs(["Analysis Plots", "Molecule Analysis", "Protein Information"]) | |
| with p1_tabs[0]: | |
| st.subheader("Molecular Properties Dashboard") | |
| if res1.get('props_plot'): | |
| # Use st.bokeh_chart for Bokeh figures | |
| st.bokeh_chart(res1['props_plot'], use_container_width=True) | |
| else: | |
| st.warning("Could not generate plots. Please check the logs for more details.") | |
| with p1_tabs[1]: | |
| st.subheader("Drug-Likeness Assessment (Lipinski's Rule of Five)") | |
| st.dataframe(res1.get('lipinski_subset_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| st.subheader("Calculated Molecular Properties") | |
| st.dataframe(res1.get('props_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with p1_tabs[2]: | |
| st.subheader("Protein 3D Structure (Interactive)") | |
| if res1.get('protein_view_html'): | |
| st.components.v1.html(res1['protein_view_html'], height=600, scrolling=False) | |
| st.subheader("FASTA Sequence Information") | |
| st.text_area("", res1.get('fasta_log', 'No data'), height=200, key="fasta_info_area") | |
| # ===== TAB 2: LEAD GENERATION & OPTIMIZATION ===== | |
| with tab2: | |
| st.header("π Step 2: Virtual Screening and ADMET Prediction") | |
| st.markdown("Screen candidate molecules against the target, predict their ADMET properties, and visualize the top candidates.") | |
| with st.form(key="phase2_form"): | |
| st.subheader("Analysis Controls") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| phase2_pdb_id_input = st.text_input("Enter PDB ID for Interaction View", value="3B7E", key="p2_pdb") | |
| phase2_ligand_resn = st.text_input("Ligand Residue Name (in PDB)", value="G39", key="p2_ligand") | |
| with col2: | |
| default_molecules_dict = get_phase2_molecules() | |
| default_molecules_text = "\n".join([f"{name}:{smiles}" for name, smiles in default_molecules_dict.items()]) | |
| molecules_input = st.text_area( | |
| "Molecules (Name:SMILES, one per line)", | |
| value=default_molecules_text, | |
| height=250, | |
| key="p2_molecules" | |
| ) | |
| run_phase2_btn = st.form_submit_button("π Run Phase 2 Analysis", use_container_width=True) | |
| if run_phase2_btn: | |
| full_log = "--- Starting Phase 2 Analysis ---\n" | |
| smiles_dict = {} | |
| if molecules_input.strip(): | |
| try: | |
| for line in molecules_input.strip().split('\n'): | |
| cleaned_line = line.replace('\xa0', ' ').strip() | |
| if ':' in cleaned_line: | |
| name, smiles = cleaned_line.split(':', 1) | |
| smiles_dict[name.strip()] = smiles.strip() | |
| if smiles_dict: | |
| full_log += f"β Successfully parsed {len(smiles_dict)} molecules from input.\n" | |
| else: | |
| full_log += "β οΈ Could not parse any molecules. Please check the format (e.g., 'Aspirin:CC(=O)OC1=CC=CC=C1C(=O)O').\n" | |
| except Exception as e: | |
| full_log += f"β Error parsing molecules list: {e}\n" | |
| smiles_dict = {} | |
| else: | |
| full_log += "β οΈ Molecule input is empty. No analysis to perform.\n" | |
| if smiles_dict: | |
| screening_df, log_screening = simulate_virtual_screening(smiles_dict) | |
| full_log += log_screening | |
| admet_df, log_admet = predict_admet_properties(smiles_dict) | |
| full_log += log_admet | |
| combined_viz_html = "" | |
| log_viz = "" | |
| for name, smiles in smiles_dict.items(): | |
| html_block, log_mol_viz = visualize_molecule_2d_3d(smiles, name) | |
| combined_viz_html += html_block | |
| log_viz += log_mol_viz | |
| full_log += log_viz | |
| pdb_data, log_pdb_fetch_2 = fetch_pdb_structure(phase2_pdb_id_input) | |
| full_log += log_pdb_fetch_2 | |
| interaction_html, log_interaction = visualize_protein_ligand_interaction(pdb_data, phase2_pdb_id_input, phase2_ligand_resn) | |
| full_log += log_interaction | |
| st.session_state.results_p2 = { | |
| 'screening_df': screening_df, | |
| 'admet_df': admet_df, | |
| 'combined_viz_html': combined_viz_html, | |
| 'interaction_html': interaction_html, | |
| 'molecules_used': smiles_dict | |
| } | |
| else: | |
| st.session_state.results_p2 = {} | |
| full_log += "\n--- Phase 2 Analysis Complete ---" | |
| st.session_state.log_p2 = full_log | |
| st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area") | |
| if st.session_state.results_p2: | |
| res2 = st.session_state.results_p2 | |
| p2_tabs = st.tabs(["Virtual Screening & ADMET", "Molecule Visualization (2D & 3D)", "Protein-Ligand Interaction"]) | |
| with p2_tabs[0]: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Virtual Screening Results (Simulated)") | |
| st.dataframe(res2.get('screening_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with col2: | |
| st.subheader("ADMET Properties Prediction") | |
| st.dataframe(res2.get('admet_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with p2_tabs[1]: | |
| molecules_used = res2.get('molecules_used', {}) | |
| if molecules_used: | |
| st.subheader(f"Interactive 2D and 3D views of {len(molecules_used)} candidate molecules") | |
| st.info(f"Currently visualizing: {', '.join(molecules_used.keys())}") | |
| else: | |
| st.subheader("Interactive 2D and 3D views of candidate molecules") | |
| if res2.get('combined_viz_html'): | |
| st.components.v1.html(res2.get('combined_viz_html'), height=len(molecules_used) * 400 + 100, scrolling=True) | |
| else: | |
| st.warning("No molecule visualizations available. Please run the analysis first.") | |
| with p2_tabs[2]: | |
| st.subheader("Detailed view of the top candidate binding to the protein.") | |
| if res2.get('interaction_html'): | |
| st.components.v1.html(res2.get('interaction_html'), height=700, scrolling=False) | |
| else: | |
| st.warning("No protein-ligand interaction view available. Please run the analysis first.") | |
| # ===== TAB 3: PRECLINICAL DEVELOPMENT ===== | |
| with tab3: | |
| st.header("π§ͺ Step 3: In-Depth Candidate Analysis and Toxicity Prediction") | |
| st.markdown("Perform a comprehensive analysis of the most promising lead compounds and use a simulated AI model to predict toxicity risk.") | |
| with st.form(key="phase3_form"): | |
| st.subheader("Analysis Controls") | |
| run_phase3_btn = st.form_submit_button("π Run Phase 3 Analysis", use_container_width=True) | |
| if run_phase3_btn: | |
| full_log = "--- Starting Phase 3 Analysis ---\n" | |
| smiles_dict = get_phase3_molecules() | |
| comp_props_df, log_comp_props = calculate_comprehensive_properties(smiles_dict) | |
| full_log += log_comp_props | |
| tox_df, log_tox = predict_toxicity(comp_props_df) | |
| full_log += log_tox | |
| combined_viz_html = "" | |
| log_viz_p3 = "" | |
| for name, smiles in smiles_dict.items(): | |
| html_block, log_mol_viz_p3 = visualize_molecule_2d_3d(smiles, name) | |
| combined_viz_html += html_block | |
| log_viz_p3 += log_mol_viz_p3 | |
| full_log += log_viz_p3 | |
| full_log += "\n--- Phase 3 Analysis Complete ---" | |
| st.session_state.log_p3 = full_log | |
| st.session_state.results_p3 = { | |
| 'comp_props_df': comp_props_df, | |
| 'tox_df': tox_df, | |
| 'combined_viz_html': combined_viz_html | |
| } | |
| st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area") | |
| if st.session_state.results_p3: | |
| res3 = st.session_state.results_p3 | |
| p3_tabs = st.tabs(["Comprehensive Properties & Toxicity", "Molecule Visualization (3D Gallery)"]) | |
| with p3_tabs[0]: | |
| st.subheader("Comprehensive Molecular Properties & AI-Powered Toxicity Prediction (Simulated)") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.dataframe(res3.get('comp_props_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with col2: | |
| st.dataframe(res3.get('tox_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with p3_tabs[1]: | |
| st.subheader("Interactive 3D gallery of the compounds under analysis.") | |
| if res3.get('combined_viz_html'): | |
| st.components.v1.html(res3.get('combined_viz_html'), height=1000, scrolling=True) | |
| # ===== TAB 4: POST-MARKET SURVEILLANCE ===== | |
| with tab4: | |
| st.header("π Step 4: Regulatory Submission and Pharmacovigilance") | |
| st.markdown("Explore summaries of the documentation needed for regulatory approval and simulate how AI can monitor real-world data for adverse events.") | |
| with st.form(key="phase4_form"): | |
| st.subheader("Analysis Controls") | |
| rwd_input = st.text_area("Enter new adverse events (comma-separated)", value="severe allergic reaction, joint pain, severe allergic reaction", height=100, key="p4_rwd") | |
| run_phase4_btn = st.form_submit_button("π Run Phase 4 Analysis", use_container_width=True) | |
| if run_phase4_btn: | |
| full_log = "--- Starting Phase 4 Analysis ---\n" | |
| reg_df, log_reg = get_regulatory_summary() | |
| full_log += log_reg | |
| eth_df, log_eth = get_ethical_framework() | |
| full_log += log_eth | |
| rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input) | |
| full_log += log_rwd | |
| full_log += "\n--- Phase 4 Analysis Complete ---" | |
| st.session_state.log_p4 = full_log | |
| st.session_state.results_p4 = { | |
| 'rwd_df': rwd_df, | |
| 'plot_bar': plot_bar, | |
| 'reg_df': reg_df, | |
| 'eth_df': eth_df | |
| } | |
| st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area") | |
| if st.session_state.results_p4: | |
| res4 = st.session_state.results_p4 | |
| p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"]) | |
| with p4_tabs[0]: | |
| st.subheader("Simulated Adverse Event Analysis") | |
| if res4.get('plot_bar'): | |
| st.pyplot(res4['plot_bar']) | |
| st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with p4_tabs[1]: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("AI/ML Documentation Summary for Submission") | |
| st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |
| with col2: | |
| st.subheader("Ethical Framework for AI in Healthcare") | |
| st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True) | |