alidenewade commited on
Commit
430eb42
Β·
verified Β·
1 Parent(s): 7198ea6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +453 -142
app.py CHANGED
@@ -20,7 +20,6 @@ from sklearn.model_selection import train_test_split
20
 
21
  # 3D Visualization
22
  import py3Dmol
23
- from stmol import showmol # Import the new component
24
 
25
  # Bokeh plotting
26
  from bokeh.plotting import figure
@@ -150,28 +149,29 @@ def fetch_fasta_sequence(protein_id: str):
150
  log += f"❌ An error occurred while fetching FASTA data: {e}\n"
151
  return log
152
 
153
- # REFACTORED: This function now returns a py3Dmol viewer object, not HTML
154
- def visualize_protein_3d(pdb_data: str):
155
  """
156
  Generates an interactive 3D protein visualization using py3Dmol.
157
  """
158
  if not pdb_data:
159
  return None, "Cannot generate 3D view: No PDB data provided."
160
  try:
161
- viewer = py3Dmol.view(width=700, height=600)
162
  viewer.setBackgroundColor('#1C1C1C')
163
  viewer.addModel(pdb_data, "pdb")
164
  viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
165
  viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'})
166
  viewer.zoomTo()
167
- log = f"βœ… Generated 3D visualization object.\n"
168
- return viewer, log
 
169
  except Exception as e:
170
  return None, f"❌ 3D visualization error: {e}"
171
 
172
  def create_sample_molecules():
173
  """
174
  Returns a dictionary of sample molecules in Name:SMILES format.
 
175
  """
176
  return {
177
  "Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C",
@@ -181,9 +181,9 @@ def create_sample_molecules():
181
  "Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor
182
  "Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N",
183
  "Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1",
184
- "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
185
- "Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1",
186
- "Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C",
187
  }
188
 
189
  def calculate_molecular_properties(smiles_dict: dict):
@@ -196,9 +196,13 @@ def calculate_molecular_properties(smiles_dict: dict):
196
  mol = Chem.MolFromSmiles(smiles)
197
  if mol:
198
  props = {
199
- 'Molecule': name, 'SMILES': smiles, 'MW': Descriptors.MolWt(mol),
200
- 'LogP': Descriptors.MolLogP(mol), 'HBD': Descriptors.NumHDonors(mol),
201
- 'HBA': Descriptors.NumHAcceptors(mol), 'TPSA': Descriptors.TPSA(mol),
 
 
 
 
202
  'RotBonds': Descriptors.NumRotatableBonds(mol),
203
  }
204
  properties.append(props)
@@ -212,6 +216,7 @@ def calculate_molecular_properties(smiles_dict: dict):
212
  def assess_drug_likeness(df: pd.DataFrame):
213
  """
214
  Assesses drug-likeness based on Lipinski's Rule of Five.
 
215
  """
216
  if df.empty:
217
  return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
@@ -222,6 +227,7 @@ def assess_drug_likeness(df: pd.DataFrame):
222
  analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5
223
  analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
224
  analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
 
225
  analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
226
 
227
  display_df = df.copy()
@@ -229,10 +235,13 @@ def assess_drug_likeness(df: pd.DataFrame):
229
  display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'βœ… Yes' if x else '❌ No')
230
 
231
  log = "βœ… Assessed drug-likeness using Lipinski's Rule of Five.\n"
 
232
  return analysis_df, display_df, log
233
 
 
234
  def plot_properties_dashboard(df: pd.DataFrame):
235
- """Creates a 2x2 dashboard of molecular property visualizations using Bokeh."""
 
236
  if df.empty or 'Drug_Like' not in df.columns:
237
  return None, "Cannot plot: No analysis data or 'Drug_Like' column missing."
238
 
@@ -251,56 +260,89 @@ def plot_properties_dashboard(df: pd.DataFrame):
251
  ])
252
 
253
  plot_config = {
254
- 'sizing_mode': 'scale_width', 'aspect_ratio': 1, 'background_fill_color': None,
255
- 'border_fill_color': None, 'outline_line_color': '#333333', 'min_border': 50
 
 
256
  }
257
 
258
  def style_plot(p, x_label, y_label, title):
259
- p.title.text, p.title.text_color, p.title.text_font_size, p.title.text_font_style = title, '#FFFFFF', '14pt', 'bold'
260
- p.xaxis.axis_label, p.yaxis.axis_label, p.axis.axis_label_text_color = x_label, y_label, '#CCCCCC'
261
- p.axis.axis_label_text_font_size, p.axis.major_label_text_color = '11pt', '#AAAAAA'
262
- p.grid.grid_line_color, p.grid.grid_line_alpha = '#2A2A2A', 0.3
 
 
 
 
 
 
 
 
 
 
 
 
263
  if p.legend:
264
- p.legend.location, p.legend.background_fill_color, p.legend.border_line_color = "top_right", '#1A1A1A', '#444444'
265
- p.legend.label_text_color, p.legend.click_policy = '#FFFFFF', "mute"
 
 
 
 
266
  return p
267
 
268
- p1 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
269
- p1.scatter('MW', 'LogP', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
270
- p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2)
271
- p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2)
 
272
  style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
273
 
274
- p2 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
275
- p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
276
- p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2)
277
- p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2)
278
  style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
279
 
280
- p3 = figure(tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
281
- p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8)
282
- p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2)
283
- p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2)
284
  style_plot(p3, "Topological Polar Surface Area (Γ…Β²)", "Rotatable Bonds", "Drug Permeability Indicators")
285
 
286
  p4_config = plot_config.copy()
287
- p4_config['tools'], p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)}) = "hover", {}
 
288
  p4 = figure(title="Drug-Likeness Distribution", **p4_config)
289
 
 
290
  counts = df['Category'].value_counts()
291
- data = pd.DataFrame(counts).reset_index()
292
- data.columns = ['category', 'value']
293
  data['angle'] = data['value']/data['value'].sum() * 2*pi
294
- data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in data['category']]
295
  data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1)
296
- drug_like_percentage = (df['Drug_Like'].sum() / len(df) * 100) if len(df) > 0 else 0
 
 
 
 
297
 
298
  wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
299
  start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
300
- line_color="white", fill_color='color', legend_field='category', source=data)
 
 
 
 
 
 
 
 
 
301
 
302
- p4.add_tools(HoverTool(tooltips=[("Category", "@category"), ("Count", "@value"), ("Percentage", "@percentage{%0.1f}%%")], renderers=[wedge_renderer]))
303
- p4.text(0, 0, text=[f"{len(df)}\nCompounds\n({drug_like_percentage:.1f}% Drug-Like)"],
304
  text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold")
305
 
306
  style_plot(p4, "", "", "Compound Classification")
@@ -312,18 +354,32 @@ def plot_properties_dashboard(df: pd.DataFrame):
312
 
313
  # ===== Phase 2 Functions =====
314
  def get_phase2_molecules():
 
 
 
 
315
  return {
316
- 'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1', 'Ibuprofen (Pain/Inflammation)': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
317
- 'Aspirin (Pain/Antiplatelet)': 'CC(=O)Oc1ccccc1C(=O)O', 'Naproxen (Pain/Inflammation)': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
318
- 'Diazepam (Anxiolytic)': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12', 'Metformin (Diabetes)': 'CN(C)C(=N)N=C(N)N',
319
- 'Loratadine (Antihistamine)': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1', 'Morphine (Opioid Analgesic)': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
320
- 'Cetirizine (Antihistamine)': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO', 'Fluoxetine (Antidepressant)': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
321
- 'Amoxicillin (Antibiotic)': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C', 'Atorvastatin (Cholesterol)': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
322
- 'Ciprofloxacin (Antibiotic)': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12', 'Warfarin (Anticoagulant)': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
323
- 'Furosemide (Diuretic)': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N', 'Sildenafil (Erectile Dysfunction)': 'CCCC1=NN(C)C(=NC1=O)c1cc(N2CCN(C)CC2)c(OC)cc1S(=O)(=O)C',
324
- 'Omeprazole (GERD)': 'COc1ccc(C)c(c1NC(=O)c1cn(Cc2ccc(OC)cc2)cn1)OC', 'Losartan (Hypertension)': 'Cc1cnc(n1C)c1ccc(cc1)-c1ccccc1COC(=O)c1ccccc1',
 
 
 
 
 
 
 
 
 
325
  }
326
 
 
327
  def simulate_virtual_screening(smiles_dict: dict):
328
  np.random.seed(42)
329
  scores = np.random.uniform(2.0, 9.8, len(smiles_dict))
@@ -348,25 +404,83 @@ def predict_admet_properties(smiles_dict: dict):
348
  log += f"βœ… Predicted ADMET properties for {len(df)} molecules.\n"
349
  return df, log
350
 
351
- # REFACTORED: This function now returns a 2D image (as bytes) and a 3D viewer object
352
- @st.cache_data
353
- def generate_molecule_visuals(smiles: str):
354
- """Generates a 2D image and a 3D py3Dmol viewer object for a single molecule."""
355
  try:
356
  mol = Chem.MolFromSmiles(smiles)
357
- if not mol: return None, None, "Invalid SMILES"
358
-
359
- # 2D SVG Image
360
  drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300)
 
361
  drawer.drawOptions().clearBackground = False
362
- drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  drawer.DrawMolecule(mol)
364
  drawer.FinishDrawing()
365
  svg_2d = drawer.GetDrawingText().replace('svg:', '')
366
- # Simple color replacement for dark theme
367
- svg_2d = svg_2d.replace('black', 'white')
368
 
369
- # 3D Viewer Object
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  mol_3d = Chem.AddHs(mol)
371
  AllChem.EmbedMolecule(mol_3d, randomSeed=42)
372
  AllChem.MMFFOptimizeMolecule(mol_3d)
@@ -377,48 +491,75 @@ def generate_molecule_visuals(smiles: str):
377
  viewer.addModel(sdf_data, "sdf")
378
  viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}})
379
  viewer.zoomTo()
380
-
381
- log = f"βœ… Generated 2D/3D views.\n"
382
- return svg_2d, viewer, log
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  except Exception as e:
384
- return None, None, f"❌ Error visualizing molecule: {e}"
385
 
386
- # REFACTORED: This function now returns a py3Dmol viewer object, not HTML
387
  def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
388
  """
389
  Generates a protein-ligand interaction visualization using py3Dmol.
390
  """
391
  if not pdb_data:
392
  return None, "Cannot generate interaction view: No PDB data provided."
 
393
  try:
394
- viewer = py3Dmol.view(width=700, height=650)
395
  viewer.setBackgroundColor('#1C1C1C')
 
 
396
  viewer.addModel(pdb_data, "pdb")
 
 
397
  viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
 
 
398
  if ligand_resn:
399
  viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
400
  viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
401
- viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
402
- viewer.zoomTo({'resn': ligand_resn})
403
- else:
404
- viewer.zoomTo()
405
 
 
 
 
 
 
 
406
  log = f"βœ… Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
407
- return viewer, log
 
408
  except Exception as e:
409
  return None, f"❌ Interaction visualization error: {e}"
410
 
411
  # ===== Phase 3 Functions =====
412
  def get_phase3_molecules():
 
 
 
 
413
  return {
414
  'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C',
415
  'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O',
416
  'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4',
417
  'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C',
418
  "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
419
- "Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C",
 
420
  "Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C",
421
- "Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O",
422
  }
423
 
424
  def calculate_comprehensive_properties(smiles_dict: dict):
@@ -463,48 +604,90 @@ def predict_toxicity(properties_df: pd.DataFrame):
463
  # ===== Phase 4 Functions =====
464
  def get_regulatory_summary():
465
  summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'],
466
- 'Description': ['Data sourced from ChEMBL, PDB, GISAID.',
467
- 'GCN (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).',
468
  'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.',
469
  'SHAP used for patient stratification model outputs.']}
470
  return pd.DataFrame(summary), "βœ… Generated AI/ML documentation summary."
471
 
472
  def simulate_rwd_analysis(adverse_event_text):
 
 
 
473
  np.random.seed(42)
474
  base_events = list(np.random.choice(
475
- ['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea'], 100,
476
- p=[0.2, 0.15, 0.12, 0.12, 0.1, 0.08, 0.23]
 
477
  ))
 
478
  user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
 
479
  all_events = base_events + user_terms
480
- event_counts = pd.Series(all_events).value_counts().nlargest(10)
 
 
 
481
  results_df = event_counts.reset_index()
482
  results_df.columns = ['Adverse_Event', 'Frequency']
483
- log = f"βœ… Analyzed {len(all_events)} total event reports.\n"
 
484
 
 
485
  source = ColumnDataSource(results_df)
486
- p = figure(y_range=results_df['Adverse_Event'].tolist()[::-1], height=450, title="Top 10 Reported Adverse Events",
487
- sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save")
488
- p.add_tools(HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")]))
489
- p.hbar(y='Adverse_Event', right='Frequency', source=source, height=0.7, color='#00A0FF', line_color='white')
 
 
 
 
 
 
 
 
 
 
 
 
490
  p.background_fill_color = "#1C1C1C"
491
  p.border_fill_color = "#1C1C1C"
 
492
  p.title.text_color = "white"
493
- p.axis.axis_label_text_color, p.axis.major_label_text_color = "#CCCCCC", "#AAAAAA"
494
- p.grid.grid_line_alpha, p.grid.grid_line_color, p.x_range.start = 0.3, "#444444", 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  return results_df, p, log
496
 
497
  def get_ethical_framework():
498
  framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
499
  'Implementation Strategy': [
500
  'AI models prioritize patient outcomes and clinical efficacy.',
501
- 'Toxicity prediction models aim to minimize patient harm.',
502
- 'Algorithms are audited for demographic bias.',
503
  'Model cards and SHAP values are provided for key decision-making processes.'
504
  ]}
505
  return pd.DataFrame(framework), "βœ… Generated Ethical AI Framework summary."
506
 
507
  # --- 3. Streamlit UI Layout ---
 
 
 
508
  if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
509
  if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
510
  if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
@@ -514,179 +697,307 @@ if 'results_p2' not in st.session_state: st.session_state.results_p2 = {}
514
  if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
515
  if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
516
 
 
517
  st.title("πŸ”¬ AI-Powered Drug Discovery Pipeline")
518
  st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
519
 
 
520
  tab1, tab2, tab3, tab4 = st.tabs([
521
- "**Phase 1:** Target Identification", "**Phase 2:** Hit Discovery & ADMET",
522
- "**Phase 3:** Lead Optimization", "**Phase 4:** Pre-clinical & RWE"
 
 
523
  ])
524
 
525
  # --- Phase 1: Target Identification ---
526
  with tab1:
527
  st.header("Phase 1: Target Identification & Initial Analysis")
528
- pdb_options = {"Neuraminidase (2HU4)": "2HU4", "KRAS G12D (7XKJ)": "7XKJ", "SARS-CoV-2 Mpro (8HUR)": "8HUR", "EGFR Kinase (1M17)": "1M17"}
529
- protein_options = {"Neuraminidase (P03468)": "P03468", "KRAS (P01116)": "P01116", "SARS-CoV-2 Mpro (P0DTD1)": "P0DTD1", "EGFR (P00533)": "P00533"}
 
 
 
 
530
 
 
 
 
 
 
 
 
531
  selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0)
532
  pdb_id_input = pdb_options[selected_pdb_name]
 
 
 
 
 
 
 
 
533
  selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0)
534
  protein_id_input = protein_options[selected_protein_name]
535
 
536
  st.markdown("---")
 
537
  sample_molecules = create_sample_molecules()
538
  selected_molecules = st.multiselect(
539
- "Select from known drugs:", options=list(sample_molecules.keys()),
540
- default=["Oseltamivir (Influenza)", "Aspirin (Pain/Inflammation)", "Imatinib (Gleevec - Cancer)"]
 
541
  )
542
 
543
  if st.button("πŸš€ Run Phase 1 Analysis", key="run_p1"):
544
- with st.spinner("Running Phase 1..."):
545
  full_log = "--- Phase 1 Analysis Started ---\n"
 
546
  pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
547
  full_log += log_pdb
548
- full_log += fetch_fasta_sequence(protein_id_input)
 
 
549
  smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
550
  properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
551
  full_log += log_props
 
552
  analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
553
  full_log += log_likeness
554
- protein_viewer, log_3d = visualize_protein_3d(pdb_data)
 
555
  full_log += log_3d
 
556
  dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
557
  full_log += log_dash
558
- st.session_state.log_p1 = full_log + "\n--- Phase 1 Analysis Complete ---"
559
- st.session_state.results_p1 = {'protein_viewer': protein_viewer, 'properties_df': display_df, 'dashboard': dashboard_plot}
 
 
 
 
 
 
 
 
560
 
561
- st.text_area("Status & Logs", st.session_state.log_p1, height=200)
562
 
563
- if st.session_state.results_p1:
 
 
 
564
  res1 = st.session_state.results_p1
565
  p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
 
566
  with p1_tabs[0]:
567
  st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
568
- if res1.get('protein_viewer'):
569
- showmol(res1['protein_viewer'], height=600, width=700)
 
 
 
570
  with p1_tabs[1]:
571
  st.subheader("Physicochemical Properties Analysis")
 
572
  st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
573
  if res1.get('dashboard'):
574
  st.bokeh_chart(res1['dashboard'], use_container_width=True)
575
 
 
576
  # --- Phase 2: Hit Discovery & ADMET ---
577
  with tab2:
578
  st.header("Phase 2: Virtual Screening & Early ADMET")
 
 
 
 
 
 
 
 
579
  p2_molecules = get_phase2_molecules()
580
  st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
581
 
 
582
  interaction_pdb_options = {
583
- "Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"}, "KRAS G12C + MRTX-1133 (7XKJ)": {"pdb": "7XKJ", "ligand": "M13"},
584
- "SARS-CoV-2 Mpro + Ensitrelvir (8HUR)": {"pdb": "8HUR", "ligand": "X77"}, "EGFR + Erlotinib (1M17)": {"pdb": "1M17", "ligand": "ERL"},
 
 
585
  }
586
- selected_interaction_pdb_name = st.selectbox("Select PDB ID for Interaction:", options=list(interaction_pdb_options.keys()), index=0)
 
 
 
 
587
  p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"]
588
  p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"]
589
 
 
 
 
590
  if st.button("πŸš€ Run Phase 2 Analysis", key="run_p2"):
591
- with st.spinner("Running Phase 2..."):
592
  full_log = "--- Phase 2 Analysis Started ---\n"
 
593
  screening_df, log_screen = simulate_virtual_screening(p2_molecules)
594
  full_log += log_screen
595
  admet_df, log_admet = predict_admet_properties(p2_molecules)
596
  full_log += log_admet
 
597
  merged_df = pd.merge(screening_df, admet_df, on="Molecule")
 
598
  pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
599
  full_log += log_pdb_p2
600
- interaction_viewer, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
 
601
  full_log += log_interact
602
- st.session_state.log_p2 = full_log + "\n--- Phase 2 Analysis Complete ---"
603
- st.session_state.results_p2 = {'merged_df': merged_df, 'interaction_viewer': interaction_viewer}
604
 
605
- st.text_area("Status & Logs", st.session_state.log_p2, height=200)
 
 
 
 
 
 
 
606
 
607
- if st.session_state.results_p2:
 
 
 
608
  res2 = st.session_state.results_p2
609
  p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
 
610
  with p2_tabs[0]:
 
611
  st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
 
612
  with p2_tabs[1]:
613
- st.subheader(f"Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
614
- if res2.get('interaction_viewer'):
615
- showmol(res2['interaction_viewer'], height=650, width=700)
616
-
 
 
617
  # --- Phase 3: Lead Optimization ---
618
  with tab3:
619
  st.header("Phase 3: Lead Compound Optimization")
 
 
 
 
 
 
 
 
620
  p3_molecules = get_phase3_molecules()
621
  selected_leads = st.multiselect(
622
- "Select lead compounds to optimize:", options=list(p3_molecules.keys()),
623
- default=['Oseltamivir (Influenza)', 'Remdesivir (Antiviral)', 'Imatinib (Gleevec - Cancer)']
 
624
  )
625
 
626
  if st.button("πŸš€ Run Phase 3 Analysis", key="run_p3"):
627
- with st.spinner("Running Phase 3..."):
628
  full_log = "--- Phase 3 Analysis Started ---\n"
 
629
  smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
 
630
  comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
631
  full_log += log_comp
 
632
  toxicity_df, log_tox = predict_toxicity(comp_props_df)
633
  full_log += log_tox
 
634
  final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
635
- visuals = {name: generate_molecule_visuals(smiles) for name, smiles in smiles_to_analyze_p3.items()}
636
- st.session_state.log_p3 = full_log + "\n--- Phase 3 Analysis Complete ---"
637
- st.session_state.results_p3 = {'final_df': final_df, 'visuals': visuals}
 
 
 
 
 
 
 
 
 
 
638
 
639
- st.text_area("Status & Logs", st.session_state.log_p3, height=200)
640
 
641
- if st.session_state.results_p3:
 
 
 
 
642
  res3 = st.session_state.results_p3
643
  st.subheader("Lead Compound Analysis & Toxicity Prediction")
644
  st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
645
 
646
  st.subheader("2D & 3D Molecular Structures")
647
- for name, (svg_2d, viewer_3d, log) in res3.get('visuals', {}).items():
648
- st.markdown(f"#### {name}")
649
- col1, col2 = st.columns(2)
650
- with col1:
651
- st.markdown("##### 2D Structure")
652
- if svg_2d:
653
- st.image(svg_2d)
654
- with col2:
655
- st.markdown("##### 3D Structure")
656
- if viewer_3d:
657
- showmol(viewer_3d, height=300, width=400)
658
 
659
  # --- Phase 4: Pre-clinical & RWE ---
660
  with tab4:
661
  st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
 
 
 
 
 
 
 
662
  rwd_input = st.text_area(
663
  "Enter simulated adverse event report text:",
664
- "Patient reports include instances of headache, severe nausea, and occasional skin rash.", height=150
 
665
  )
666
 
667
  if st.button("πŸš€ Run Phase 4 Analysis", key="run_p4"):
668
- with st.spinner("Running Phase 4..."):
669
  full_log = "--- Phase 4 Analysis Started ---\n"
 
670
  reg_df, log_reg = get_regulatory_summary()
671
  full_log += log_reg
 
672
  eth_df, log_eth = get_ethical_framework()
673
  full_log += log_eth
 
674
  rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
675
  full_log += log_rwd
676
- st.session_state.log_p4 = full_log + "\n--- Phase 4 Analysis Complete ---"
677
- st.session_state.results_p4 = {'rwd_df': rwd_df, 'plot_bar': plot_bar, 'reg_df': reg_df, 'eth_df': eth_df}
 
 
 
 
 
 
 
678
 
679
- st.text_area("Status & Logs", st.session_state.log_p4, height=200)
680
 
681
- if st.session_state.results_p4:
 
 
 
682
  res4 = st.session_state.results_p4
683
  p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
 
684
  with p4_tabs[0]:
 
685
  if res4.get('plot_bar'):
686
  st.bokeh_chart(res4['plot_bar'], use_container_width=True)
687
  st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
 
688
  with p4_tabs[1]:
689
  st.subheader("AI/ML Model Regulatory Summary")
690
  st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
 
691
  st.subheader("Ethical AI Framework")
692
- st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)
 
 
20
 
21
  # 3D Visualization
22
  import py3Dmol
 
23
 
24
  # Bokeh plotting
25
  from bokeh.plotting import figure
 
149
  log += f"❌ An error occurred while fetching FASTA data: {e}\n"
150
  return log
151
 
152
+ def visualize_protein_3d(pdb_data: str, title="Protein 3D Structure"):
 
153
  """
154
  Generates an interactive 3D protein visualization using py3Dmol.
155
  """
156
  if not pdb_data:
157
  return None, "Cannot generate 3D view: No PDB data provided."
158
  try:
159
+ viewer = py3Dmol.view(width='100%', height=600)
160
  viewer.setBackgroundColor('#1C1C1C')
161
  viewer.addModel(pdb_data, "pdb")
162
  viewer.setStyle({'cartoon': {'color': 'spectrum', 'thickness': 0.8}})
163
  viewer.addSurface(py3Dmol.VDW, {'opacity': 0.3, 'color': 'lightblue'})
164
  viewer.zoomTo()
165
+ html = viewer._make_html()
166
+ log = f"βœ… Generated 3D visualization for {title}."
167
+ return html, log
168
  except Exception as e:
169
  return None, f"❌ 3D visualization error: {e}"
170
 
171
  def create_sample_molecules():
172
  """
173
  Returns a dictionary of sample molecules in Name:SMILES format.
174
+ Expanded list for more comprehensive demonstration.
175
  """
176
  return {
177
  "Oseltamivir (Influenza)": "CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C",
 
181
  "Atorvastatin (Cholesterol)": "CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1", # Lipitor
182
  "Metformin (Diabetes)": "CN(C)C(=N)N=C(N)N",
183
  "Loratadine (Antihistamine)": "CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1",
184
+ "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1", # Complex structure, tyrosine kinase inhibitor
185
+ "Amlodipine (Hypertension)": "CCC(COC(=O)c1cnc(C)c(c1C)C(=O)OC)c1ccc(Cl)cc1", # Calcium channel blocker
186
+ "Rosuvastatin (Cholesterol)": "CC(C)c1ccc(cc1)S(=O)(=O)Nc1ncc(C)c(C(=O)O[C@H](C)[C@H](O)CC(=O)O)c1C", # Statin
187
  }
188
 
189
  def calculate_molecular_properties(smiles_dict: dict):
 
196
  mol = Chem.MolFromSmiles(smiles)
197
  if mol:
198
  props = {
199
+ 'Molecule': name,
200
+ 'SMILES': smiles,
201
+ 'MW': Descriptors.MolWt(mol),
202
+ 'LogP': Descriptors.MolLogP(mol),
203
+ 'HBD': Descriptors.NumHDonors(mol),
204
+ 'HBA': Descriptors.NumHAcceptors(mol),
205
+ 'TPSA': Descriptors.TPSA(mol),
206
  'RotBonds': Descriptors.NumRotatableBonds(mol),
207
  }
208
  properties.append(props)
 
216
  def assess_drug_likeness(df: pd.DataFrame):
217
  """
218
  Assesses drug-likeness based on Lipinski's Rule of Five.
219
+ This version returns a boolean for plotting and a formatted string for display.
220
  """
221
  if df.empty:
222
  return pd.DataFrame(), pd.DataFrame(), "Cannot assess drug-likeness: No properties data."
 
227
  analysis_df['HBD_OK'] = analysis_df['HBD'] <= 5
228
  analysis_df['HBA_OK'] = analysis_df['HBA'] <= 10
229
  analysis_df['Lipinski_Violations'] = (~analysis_df[['MW_OK', 'LogP_OK', 'HBD_OK', 'HBA_OK']]).sum(axis=1)
230
+
231
  analysis_df['Drug_Like'] = analysis_df['Lipinski_Violations'] <= 1
232
 
233
  display_df = df.copy()
 
235
  display_df['Drug_Like'] = analysis_df['Drug_Like'].apply(lambda x: 'βœ… Yes' if x else '❌ No')
236
 
237
  log = "βœ… Assessed drug-likeness using Lipinski's Rule of Five.\n"
238
+
239
  return analysis_df, display_df, log
240
 
241
+
242
  def plot_properties_dashboard(df: pd.DataFrame):
243
+ """Creates a professional 2x2 dashboard of molecular property visualizations using Bokeh."""
244
+ from math import pi, cos, sin
245
  if df.empty or 'Drug_Like' not in df.columns:
246
  return None, "Cannot plot: No analysis data or 'Drug_Like' column missing."
247
 
 
260
  ])
261
 
262
  plot_config = {
263
+ 'sizing_mode': 'scale_width', 'aspect_ratio': 1,
264
+ 'background_fill_color': None, 'border_fill_color': None,
265
+ 'outline_line_color': '#333333', 'min_border_left': 50,
266
+ 'min_border_right': 50, 'min_border_top': 50, 'min_border_bottom': 50
267
  }
268
 
269
  def style_plot(p, x_label, y_label, title):
270
+ """Apply consistent professional styling to plots."""
271
+ p.title.text = title
272
+ p.title.text_color = '#FFFFFF'
273
+ p.title.text_font_size = '14pt'
274
+ p.title.text_font_style = 'bold'
275
+
276
+ p.xaxis.axis_label = x_label
277
+ p.yaxis.axis_label = y_label
278
+ p.axis.axis_label_text_color = '#CCCCCC'
279
+ p.axis.axis_label_text_font_size = '11pt'
280
+ p.axis.major_label_text_color = '#AAAAAA'
281
+ p.axis.major_label_text_font_size = '10pt'
282
+
283
+ p.grid.grid_line_color = '#2A2A2A'
284
+ p.grid.grid_line_alpha = 0.3
285
+
286
  if p.legend:
287
+ p.legend.location = "top_right"
288
+ p.legend.background_fill_color = '#1A1A1A'
289
+ p.legend.background_fill_alpha = 0.8
290
+ p.legend.border_line_color = '#444444'
291
+ p.legend.label_text_color = '#FFFFFF'
292
+ p.legend.click_policy = "mute"
293
  return p
294
 
295
+ p1 = figure(title="Molecular Weight vs LogP", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
296
+ p1.scatter('MW', 'LogP', source=source, legend_group='Category',
297
+ color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
298
+ p1.line([500, 500], [df['LogP'].min()-0.5, df['LogP'].max()+0.5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="MW ≀ 500")
299
+ p1.line([df['MW'].min()-50, df['MW'].max()+50], [5, 5], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="LogP ≀ 5")
300
  style_plot(p1, "Molecular Weight (Da)", "LogP", "Lipinski Rule: MW vs LogP")
301
 
302
+ p2 = figure(title="Hydrogen Bonding Profile", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
303
+ p2.scatter('HBD', 'HBA', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
304
+ p2.line([5, 5], [df['HBA'].min()-1, df['HBA'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBD ≀ 5")
305
+ p2.line([df['HBD'].min()-1, df['HBD'].max()+1], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="HBA ≀ 10")
306
  style_plot(p2, "Hydrogen Bond Donors", "Hydrogen Bond Acceptors", "Lipinski Rule: Hydrogen Bonding")
307
 
308
+ p3 = figure(title="Molecular Flexibility & Polarity", tools=[scatter_hover, 'pan,wheel_zoom,box_zoom,reset,save'], **plot_config)
309
+ p3.scatter('TPSA', 'RotBonds', source=source, legend_group='Category', color=color_mapper, size=12, alpha=0.8, line_color='white', line_width=0.5)
310
+ p3.line([140, 140], [df['RotBonds'].min()-1, df['RotBonds'].max()+1], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="TPSA ≀ 140")
311
+ p3.line([df['TPSA'].min()-10, df['TPSA'].max()+10], [10, 10], line_dash="dashed", line_color="#FFD700", line_width=2, alpha=0.7, legend_label="RotBonds ≀ 10")
312
  style_plot(p3, "Topological Polar Surface Area (Γ…Β²)", "Rotatable Bonds", "Drug Permeability Indicators")
313
 
314
  p4_config = plot_config.copy()
315
+ p4_config['tools'] = "hover"
316
+ p4_config.update({'x_range': (-1.0, 1.0), 'y_range': (-1.0, 1.0)})
317
  p4 = figure(title="Drug-Likeness Distribution", **p4_config)
318
 
319
+ # Calculate percentages for the doughnut chart
320
  counts = df['Category'].value_counts()
321
+ data = pd.DataFrame({'category': counts.index, 'value': counts.values})
 
322
  data['angle'] = data['value']/data['value'].sum() * 2*pi
323
+ data['color'] = [colors[0] if cat == 'Drug-Like' else colors[1] for cat in counts.index]
324
  data['percentage'] = (data['value'] / data['value'].sum() * 100).round(1)
325
+
326
+ # Calculate overall drug-like percentage for central text
327
+ total_compounds = len(df)
328
+ drug_like_count = df['Drug_Like'].sum()
329
+ drug_like_percentage = (drug_like_count / total_compounds * 100) if total_compounds > 0 else 0
330
 
331
  wedge_renderer = p4.annular_wedge(x=0, y=0, inner_radius=0.25, outer_radius=0.45,
332
  start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
333
+ line_color="white", line_width=3, fill_color='color',
334
+ legend_field='category', source=data)
335
+
336
+ # Updated HoverTool to display percentage
337
+ donut_hover = HoverTool(tooltips=[
338
+ ("Category", "@category"),
339
+ ("Count", "@value"),
340
+ ("Percentage", "@percentage{%0.1f}%%") # Display percentage with one decimal place
341
+ ], renderers=[wedge_renderer])
342
+ p4.add_tools(donut_hover)
343
 
344
+ # Updated central text to show Drug-Like percentage
345
+ p4.text([0], [0], text=[f"{total_compounds}\nCompounds\n({drug_like_percentage:.1f}% Drug-Like)"],
346
  text_align="center", text_baseline="middle", text_color="white", text_font_size="10pt", text_font_style="bold")
347
 
348
  style_plot(p4, "", "", "Compound Classification")
 
354
 
355
  # ===== Phase 2 Functions =====
356
  def get_phase2_molecules():
357
+ """
358
+ Returns an expanded list of common drugs with corrected SMILES for virtual screening.
359
+ These are chosen to be well-known and diverse in their therapeutic areas.
360
+ """
361
  return {
362
+ 'Paracetamol (Analgesic)': 'CC(=O)Nc1ccc(O)cc1',
363
+ 'Ibuprofen (Pain/Inflammation)': 'CC(C)Cc1ccc(C(C)C(=O)O)cc1',
364
+ 'Aspirin (Pain/Antiplatelet)': 'CC(=O)Oc1ccccc1C(=O)O',
365
+ 'Naproxen (Pain/Inflammation)': 'C[C@H](C(=O)O)c1ccc2cc(OC)ccc2c1',
366
+ 'Diazepam (Anxiolytic)': 'CN1C(=O)CN=C(c2ccccc2)c2cc(Cl)ccc12',
367
+ 'Metformin (Diabetes)': 'CN(C)C(=N)N=C(N)N',
368
+ 'Loratadine (Antihistamine)': 'CCOC(=O)N1CCC(C(c2ccc(Cl)cc2)c2ccccn2)CC1',
369
+ 'Morphine (Opioid Analgesic)': 'C[N@]1CC[C@]23c4c5ccc(O)c4O[C@H]2[C@@H](O)C=C[C@H]3[C@H]1C5',
370
+ 'Cetirizine (Antihistamine)': 'O=C(O)COCCOc1ccc(cc1)C(c1ccccc1)N1CCN(CC1)CCO',
371
+ 'Fluoxetine (Antidepressant)': 'CNCCC(c1ccccc1)Oc1ccc(C(F)(F)F)cc1',
372
+ 'Amoxicillin (Antibiotic)': 'C[C@@]1([C@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)[C@@H](N)c3ccc(O)cc3)C(=O)O)C',
373
+ 'Atorvastatin (Cholesterol)': 'CC(C)c1c(C(=O)Nc2ccccc2)c(-c2ccccc2)c(c1)c1ccc(F)cc1',
374
+ 'Ciprofloxacin (Antibiotic)': 'O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc12',
375
+ 'Warfarin (Anticoagulant)': 'O=C(c1ccccc1)C(c1oc2ccccc2c1=O)C',
376
+ 'Furosemide (Diuretic)': 'O=C(O)c1cc(Cl)c(NC2CO2)c(c1)S(=O)(=O)N',
377
+ 'Sildenafil (Erectile Dysfunction)': 'CCCC1=NN(C)C(=NC1=O)c1cc(N2CCN(C)CC2)c(OC)cc1S(=O)(=O)C',
378
+ 'Omeprazole (GERD)': 'COc1ccc(C)c(c1NC(=O)c1cn(Cc2ccc(OC)cc2)cn1)OC', # Proton pump inhibitor
379
+ 'Losartan (Hypertension)': 'Cc1cnc(n1C)c1ccc(cc1)-c1ccccc1COC(=O)c1ccccc1', # Angiotensin Receptor Blocker
380
  }
381
 
382
+
383
  def simulate_virtual_screening(smiles_dict: dict):
384
  np.random.seed(42)
385
  scores = np.random.uniform(2.0, 9.8, len(smiles_dict))
 
404
  log += f"βœ… Predicted ADMET properties for {len(df)} molecules.\n"
405
  return df, log
406
 
407
+ def visualize_molecule_2d_3d(smiles: str, name: str):
408
+ """Generates a side-by-side 2D SVG and 3D py3Dmol HTML view for a single molecule."""
409
+ log = ""
 
410
  try:
411
  mol = Chem.MolFromSmiles(smiles)
412
+ if not mol: return f"<p>Invalid SMILES for {name}</p>", f"❌ Invalid SMILES for {name}"
413
+
 
414
  drawer = Draw.rdMolDraw2D.MolDraw2DSVG(400, 300)
415
+ # Set dark theme colors for 2D drawing
416
  drawer.drawOptions().clearBackground = False
417
+ drawer.drawOptions().addStereoAnnotation = True
418
+ drawer.drawOptions().baseFontSize = 0.8
419
+ drawer.drawOptions().circleAtoms = False
420
+ drawer.drawOptions().highlightColour = (1, 0.5, 0) # Orange for highlights
421
+
422
+ # Set colors for dark background visibility
423
+ drawer.drawOptions().backgroundColour = (0.11, 0.11, 0.11) # Dark background
424
+ drawer.drawOptions().symbolColour = (1, 1, 1) # White symbols
425
+ drawer.drawOptions().defaultColour = (1, 1, 1) # White default color
426
+
427
+ # Try to set annotation color (this might help with (R)/(S) labels)
428
+ try:
429
+ drawer.drawOptions().annotationColour = (1, 1, 1) # White annotations
430
+ except:
431
+ pass
432
+
433
  drawer.DrawMolecule(mol)
434
  drawer.FinishDrawing()
435
  svg_2d = drawer.GetDrawingText().replace('svg:', '')
 
 
436
 
437
+ # More aggressive SVG text color fixes - target all possible black text variations
438
+
439
+ # First, comprehensive string replacements
440
+ svg_2d = svg_2d.replace('stroke="black"', 'stroke="white"')
441
+ svg_2d = svg_2d.replace('fill="black"', 'fill="white"')
442
+ svg_2d = svg_2d.replace('stroke="#000000"', 'stroke="#FFFFFF"')
443
+ svg_2d = svg_2d.replace('fill="#000000"', 'fill="#FFFFFF"')
444
+ svg_2d = svg_2d.replace('stroke="#000"', 'stroke="#FFF"')
445
+ svg_2d = svg_2d.replace('fill="#000"', 'fill="#FFF"')
446
+ svg_2d = svg_2d.replace('stroke:black', 'stroke:white')
447
+ svg_2d = svg_2d.replace('fill:black', 'fill:white')
448
+ svg_2d = svg_2d.replace('stroke:#000000', 'stroke:#FFFFFF')
449
+ svg_2d = svg_2d.replace('fill:#000000', 'fill:#FFFFFF')
450
+ svg_2d = svg_2d.replace('stroke:#000', 'stroke:#FFF')
451
+ svg_2d = svg_2d.replace('fill:#000', 'fill="#FFF"')
452
+ svg_2d = svg_2d.replace('stroke="rgb(0,0,0)"', 'stroke="rgb(255,255,255)"')
453
+ svg_2d = svg_2d.replace('fill="rgb(0,0,0)"', 'fill="rgb(255,255,255)"')
454
+ svg_2d = svg_2d.replace('stroke:rgb(0,0,0)', 'stroke:rgb(255,255,255)')
455
+ svg_2d = svg_2d.replace('fill:rgb(0,0,0)', 'fill:rgb(255,255,255)')
456
+ svg_2d = svg_2d.replace('color="black"', 'color="white"')
457
+ svg_2d = svg_2d.replace('color:#000000', 'color:#FFFFFF')
458
+ svg_2d = svg_2d.replace('color:#000', 'color:#FFF')
459
+
460
+ # Aggressive regex-based fixes for all text elements
461
+ # Remove any existing fill attributes from text elements and add white fill
462
+ svg_2d = re.sub(r'<text([^>]*?)\s+fill="[^"]*"([^>]*?)>', r'<text\1\2 fill="white">', svg_2d)
463
+ svg_2d = re.sub(r'<text([^>]*?)(?<!fill="white")>', r'<text\1 fill="white">', svg_2d)
464
+
465
+ # Fix style attributes in text elements
466
+ svg_2d = re.sub(r'<text([^>]*?)style="([^"]*?)fill:\s*(?:black|#000000|#000|rgb\(0,0,0\))([^"]*?)"([^>]*?)>',
467
+ r'<text\1style="\2fill:white\3"\4>', svg_2d)
468
+
469
+ # If text elements don't have any fill specified, ensure they get white
470
+ svg_2d = re.sub(r'<text(?![^>]*fill=)([^>]*?)>', r'<text fill="white"\1>', svg_2d)
471
+
472
+ # Clean up any duplicate fill attributes
473
+ svg_2d = re.sub(r'fill="white"\s+fill="white"', 'fill="white"', svg_2d)
474
+
475
+ # Final catch-all: replace any remaining black in the entire SVG
476
+ svg_2d = re.sub(r'\bblack\b', 'white', svg_2d)
477
+ svg_2d = re.sub(r'#000000', '#FFFFFF', svg_2d)
478
+ svg_2d = re.sub(r'#000\b', '#FFF', svg_2d)
479
+ svg_2d = re.sub(r'rgb\(0,\s*0,\s*0\)', 'rgb(255,255,255)', svg_2d)
480
+
481
+ # Embed the SVG within a div with a dark background for consistency
482
+ svg_2d = f'<div style="background-color: #1C1C1C; padding: 10px; border-radius: 5px;">{svg_2d}</div>'
483
+
484
  mol_3d = Chem.AddHs(mol)
485
  AllChem.EmbedMolecule(mol_3d, randomSeed=42)
486
  AllChem.MMFFOptimizeMolecule(mol_3d)
 
491
  viewer.addModel(sdf_data, "sdf")
492
  viewer.setStyle({'stick': {}, 'sphere': {'scale': 0.25}})
493
  viewer.zoomTo()
494
+ html_3d = viewer._make_html()
495
+
496
+ combined_html = f"""
497
+ <div style="display: flex; flex-direction: row; align-items: center; justify-content: space-around; border: 1px solid #444; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #2b2b2b;">
498
+ <div style="text-align: center;">
499
+ <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (2D Structure)</h4>
500
+ {svg_2d}
501
+ </div>
502
+ <div style="text-align: center;">
503
+ <h4 style="color: white; font-family: 'Roboto', sans-serif;">{name} (3D Interactive)</h4>
504
+ {html_3d}
505
+ </div>
506
+ </div>
507
+ """
508
+ log += f"βœ… Generated 2D/3D view for {name}.\n"
509
+ return combined_html, log
510
  except Exception as e:
511
+ return f"<p>Error visualizing {name}: {e}</p>", f"❌ Error visualizing {name}: {e}"
512
 
 
513
  def visualize_protein_ligand_interaction(pdb_data: str, pdb_id: str, ligand_resn: str):
514
  """
515
  Generates a protein-ligand interaction visualization using py3Dmol.
516
  """
517
  if not pdb_data:
518
  return None, "Cannot generate interaction view: No PDB data provided."
519
+
520
  try:
521
+ viewer = py3Dmol.view(width='100%', height=650)
522
  viewer.setBackgroundColor('#1C1C1C')
523
+
524
+ # Add the protein structure
525
  viewer.addModel(pdb_data, "pdb")
526
+
527
+ # Style the protein (cartoon representation)
528
  viewer.setStyle({'cartoon': {'color': 'lightblue', 'opacity': 0.8}})
529
+
530
+ # Highlight the ligand if specified
531
  if ligand_resn:
532
  viewer.addStyle({'resn': ligand_resn}, {'stick': {'colorscheme': 'greenCarbon', 'radius': 0.2}})
533
  viewer.addStyle({'resn': ligand_resn}, {'sphere': {'scale': 0.3, 'colorscheme': 'greenCarbon'}})
 
 
 
 
534
 
535
+ # Add surface representation for binding site
536
+ viewer.addSurface(py3Dmol.VDW, {'opacity': 0.2, 'color': 'white'}, {'resn': ligand_resn})
537
+
538
+ viewer.zoomTo({'resn': ligand_resn} if ligand_resn else {})
539
+
540
+ html = viewer._make_html()
541
  log = f"βœ… Generated protein-ligand interaction view for {pdb_id} with ligand {ligand_resn}."
542
+ return html, log
543
+
544
  except Exception as e:
545
  return None, f"❌ Interaction visualization error: {e}"
546
 
547
  # ===== Phase 3 Functions =====
548
  def get_phase3_molecules():
549
+ """
550
+ Returns an expanded list of lead compounds for optimization.
551
+ These are chosen to be representative of active pharmaceutical ingredients or advanced candidates.
552
+ """
553
  return {
554
  'Oseltamivir (Influenza)': 'CCC(CC)O[C@H]1[C@H]([C@@H]([C@H](C=C1C(=O)OCC)N)N)NC(=O)C',
555
  'Aspirin (Pain/Antiplatelet)': 'CC(=O)OC1=CC=CC=C1C(=O)O',
556
  'Remdesivir (Antiviral)': 'CCC(CC)COC(=O)[C@@H](C)N[P@](=O)(OC[C@@H]1O[C@](C#N)([C@H]([C@@H]1O)O)C2=CC=C3N2N=CN=C3N)OC4=CC=CC=C4',
557
  'Penicillin G (Antibiotic)': 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C',
558
  "Imatinib (Gleevec - Cancer)": "Cc1ccc(NC(=O)c2cnc(C)s2)cc1-c1cnc(Nc2ccc(CN)cc2)nc1",
559
+ "Sorafenib (Kinase Inhibitor)": "Clc1cccc(Cl)c1OC(=O)Nc1ccc(nc1)NC(=O)C(C)(C)C", # Multi-kinase inhibitor for cancer
560
+ # CORRECTED SMILES for Venetoclax
561
  "Venetoclax (BCL-2 Inhibitor)": "CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC(=C(C=C4)C(=O)NS(=O)(=O)C5=CC(=C(C=C5)NCC6CCOCC6)[N+](=O)[O-])OC7=CN=C8C(=C7)C=CN8)C",
562
+ "Dasatinib (Kinase Inhibitor)": "CC1=NC(=NC=C1SC2=NC=C(C=N2)C(=O)NC3=CC=CC(=C3)N)C(=O)O", # Multi-kinase inhibitor for leukemia
563
  }
564
 
565
  def calculate_comprehensive_properties(smiles_dict: dict):
 
604
  # ===== Phase 4 Functions =====
605
  def get_regulatory_summary():
606
  summary = {'Component': ['Data Governance', 'Model Architecture', 'Model Validation', 'Interpretability'],
607
+ 'Description': ['Data sourced from ChEMBL, PDB, GISAID. Bias assessed via geographic distribution analysis.',
608
+ 'Graph Convolutional Network (Target ID), Random Forest (ADMET), K-Means (Patient Stratification).',
609
  'ADMET Model validated with AUC-ROC > 0.85 on an independent test set.',
610
  'SHAP used for patient stratification model outputs.']}
611
  return pd.DataFrame(summary), "βœ… Generated AI/ML documentation summary."
612
 
613
  def simulate_rwd_analysis(adverse_event_text):
614
+ """
615
+ Analyzes simulated adverse event text and generates a DataFrame and Bokeh plot.
616
+ """
617
  np.random.seed(42)
618
  base_events = list(np.random.choice(
619
+ ['headache', 'nausea', 'fatigue', 'dizziness', 'rash', 'fever', 'diarrhea', 'constipation', 'insomnia', 'muscle pain'],
620
+ 100,
621
+ p=[0.2, 0.15, 0.12, 0.12, 0.1, 0.08, 0.08, 0.05, 0.05, 0.05] # Adjusted probabilities for new events
622
  ))
623
+
624
  user_terms = [word.lower() for word in re.findall(r'\b[a-zA-Z]{3,}\b', adverse_event_text)]
625
+
626
  all_events = base_events + user_terms
627
+
628
+ events_df = pd.DataFrame(all_events, columns=['Adverse_Event'])
629
+ event_counts = events_df['Adverse_Event'].value_counts().nlargest(10).sort_values(ascending=False)
630
+
631
  results_df = event_counts.reset_index()
632
  results_df.columns = ['Adverse_Event', 'Frequency']
633
+
634
+ log = f"βœ… Analyzed {len(all_events)} total event reports. Identified {len(event_counts)} unique adverse events for plotting.\n"
635
 
636
+ # Create Bokeh Plot
637
  source = ColumnDataSource(results_df)
638
+ y_range = results_df['Adverse_Event'].tolist()[::-1]
639
+
640
+ hover = HoverTool(tooltips=[("Event", "@Adverse_Event"),("Frequency", "@Frequency")])
641
+
642
+ p = figure(
643
+ y_range=y_range, height=450, title="Top 10 Reported Adverse Events",
644
+ sizing_mode='stretch_width', tools="pan,wheel_zoom,box_zoom,reset,save",
645
+ )
646
+ p.add_tools(hover)
647
+
648
+ p.hbar(
649
+ y='Adverse_Event', right='Frequency', source=source, height=0.7,
650
+ color='#00A0FF', line_color='white', legend_label="Event Frequency"
651
+ )
652
+
653
+ # Style the plot for a dark theme
654
  p.background_fill_color = "#1C1C1C"
655
  p.border_fill_color = "#1C1C1C"
656
+ p.outline_line_color = '#333333'
657
  p.title.text_color = "white"
658
+ p.title.text_font_size = '16pt'
659
+ p.title.align = "center"
660
+ p.xaxis.axis_label = "Frequency Count"
661
+ p.yaxis.axis_label = "Adverse Event"
662
+ p.axis.axis_label_text_color = "#CCCCCC"
663
+ p.axis.axis_label_text_font_size = "12pt"
664
+ p.axis.major_label_text_color = "#AAAAAA"
665
+ p.axis.major_label_text_font_size = "10pt"
666
+ p.grid.grid_line_alpha = 0.3
667
+ p.grid.grid_line_color = "#444444"
668
+ p.x_range.start = 0
669
+ p.legend.location = "top_right"
670
+ p.legend.background_fill_color = "#2A2A2A"
671
+ p.legend.background_fill_alpha = 0.7
672
+ p.legend.border_line_color = "#444444"
673
+ p.legend.label_text_color = "white"
674
+
675
  return results_df, p, log
676
 
677
  def get_ethical_framework():
678
  framework = {'Principle': ['Beneficence', 'Non-maleficence', 'Fairness', 'Transparency'],
679
  'Implementation Strategy': [
680
  'AI models prioritize patient outcomes and clinical efficacy.',
681
+ 'Toxicity prediction and pharmacovigilance models aim to minimize patient harm.',
682
+ 'Algorithms are audited for demographic bias in training data and predictions.',
683
  'Model cards and SHAP values are provided for key decision-making processes.'
684
  ]}
685
  return pd.DataFrame(framework), "βœ… Generated Ethical AI Framework summary."
686
 
687
  # --- 3. Streamlit UI Layout ---
688
+
689
+ # Initialize session state variables
690
+ if 'active_tab' not in st.session_state: st.session_state.active_tab = "Phase 1: Target Identification"
691
  if 'log_p1' not in st.session_state: st.session_state.log_p1 = "Status logs will appear here."
692
  if 'log_p2' not in st.session_state: st.session_state.log_p2 = "Status logs will appear here."
693
  if 'log_p3' not in st.session_state: st.session_state.log_p3 = "Status logs will appear here."
 
697
  if 'results_p3' not in st.session_state: st.session_state.results_p3 = {}
698
  if 'results_p4' not in st.session_state: st.session_state.results_p4 = {}
699
 
700
+ # --- Header ---
701
  st.title("πŸ”¬ AI-Powered Drug Discovery Pipeline")
702
  st.markdown("An integrated application demonstrating a four-phase computational drug discovery workflow.")
703
 
704
+ # --- Main Tabs for Each Phase ---
705
  tab1, tab2, tab3, tab4 = st.tabs([
706
+ "**Phase 1:** Target Identification",
707
+ "**Phase 2:** Hit Discovery & ADMET",
708
+ "**Phase 3:** Lead Optimization",
709
+ "**Phase 4:** Pre-clinical & RWE"
710
  ])
711
 
712
  # --- Phase 1: Target Identification ---
713
  with tab1:
714
  st.header("Phase 1: Target Identification & Initial Analysis")
715
+ st.markdown("""
716
+ In this initial phase, we identify and analyze a biological target (e.g., a protein) implicated in a disease.
717
+ We fetch its 3D structure and sequence data, then evaluate a set of initial compounds for their drug-like properties.
718
+ """)
719
+
720
+ st.subheader("Inputs & Controls")
721
 
722
+ # Updated PDB ID options
723
+ pdb_options = {
724
+ "Neuraminidase (Influenza - 2HU4)": "2HU4",
725
+ "KRAS G12D (Oncogenic Target - 7XKJ)": "7XKJ", # Bound to MRTX-1133
726
+ "SARS-CoV-2 Mpro (Antiviral Target - 8HUR)": "8HUR", # Bound to Ensitrelvir
727
+ "EGFR Kinase (Cancer Target - 1M17)": "1M17", # Bound to Erlotinib
728
+ }
729
  selected_pdb_name = st.selectbox("Select PDB ID:", options=list(pdb_options.keys()), index=0)
730
  pdb_id_input = pdb_options[selected_pdb_name]
731
+
732
+ # Updated NCBI Protein ID options
733
+ protein_options = {
734
+ "Neuraminidase (P03468)": "P03468", # Influenza A virus (A/PR/8/34)
735
+ "KRAS (P01116)": "P01116", # Human KRAS
736
+ "SARS-CoV-2 Main Protease (P0DTD1)": "P0DTD1", # SARS-CoV-2 Mpro
737
+ "EGFR (P00533)": "P00533", # Human Epidermal Growth Factor Receptor
738
+ }
739
  selected_protein_name = st.selectbox("Select NCBI Protein ID:", options=list(protein_options.keys()), index=0)
740
  protein_id_input = protein_options[selected_protein_name]
741
 
742
  st.markdown("---")
743
+ st.write("**Analyze Sample Compounds:**")
744
  sample_molecules = create_sample_molecules()
745
  selected_molecules = st.multiselect(
746
+ "Select from known drugs:",
747
+ options=list(sample_molecules.keys()),
748
+ default=["Oseltamivir (Influenza)", "Aspirin (Pain/Inflammation)", "Imatinib (Gleevec - Cancer)"] # Adjusted default selection
749
  )
750
 
751
  if st.button("πŸš€ Run Phase 1 Analysis", key="run_p1"):
752
+ with st.spinner("Fetching data and calculating properties..."):
753
  full_log = "--- Phase 1 Analysis Started ---\n"
754
+
755
  pdb_data, log_pdb = fetch_pdb_structure(pdb_id_input)
756
  full_log += log_pdb
757
+ log_fasta = fetch_fasta_sequence(protein_id_input)
758
+ full_log += log_fasta
759
+
760
  smiles_to_analyze = {name: sample_molecules[name] for name in selected_molecules}
761
  properties_df, log_props = calculate_molecular_properties(smiles_to_analyze)
762
  full_log += log_props
763
+
764
  analysis_df, display_df, log_likeness = assess_drug_likeness(properties_df)
765
  full_log += log_likeness
766
+
767
+ protein_view_html, log_3d = visualize_protein_3d(pdb_data, title=f"PDB: {pdb_id_input}")
768
  full_log += log_3d
769
+
770
  dashboard_plot, log_dash = plot_properties_dashboard(analysis_df)
771
  full_log += log_dash
772
+
773
+ full_log += "\n--- Phase 1 Analysis Complete ---"
774
+ st.session_state.log_p1 = full_log
775
+
776
+ st.session_state.results_p1 = {
777
+ 'pdb_data': pdb_data,
778
+ 'protein_view': protein_view_html,
779
+ 'properties_df': display_df,
780
+ 'dashboard': dashboard_plot
781
+ }
782
 
783
+ st.text_area("Status & Logs", st.session_state.log_p1, height=200, key="log_p1_area")
784
 
785
+ st.subheader("Results")
786
+ if not st.session_state.results_p1:
787
+ st.info("Click 'Run Phase 1 Analysis' to generate and display results.")
788
+ else:
789
  res1 = st.session_state.results_p1
790
  p1_tabs = st.tabs(["Protein Structure", "Compound Properties Dashboard"])
791
+
792
  with p1_tabs[0]:
793
  st.subheader(f"3D Structure for PDB ID: {pdb_id_input}")
794
+ if res1.get('protein_view'):
795
+ st.components.v1.html(res1['protein_view'], height=600, scrolling=False)
796
+ else:
797
+ st.warning("Could not display 3D structure. Check PDB ID and logs.")
798
+
799
  with p1_tabs[1]:
800
  st.subheader("Physicochemical Properties Analysis")
801
+ # The data table is now displayed *before* the dashboard.
802
  st.dataframe(res1.get('properties_df', pd.DataFrame()), use_container_width=True, hide_index=True)
803
  if res1.get('dashboard'):
804
  st.bokeh_chart(res1['dashboard'], use_container_width=True)
805
 
806
+
807
  # --- Phase 2: Hit Discovery & ADMET ---
808
  with tab2:
809
  st.header("Phase 2: Virtual Screening & Early ADMET")
810
+ st.markdown("""
811
+ This phase simulates a virtual screening process to identify 'hits' from a larger library of compounds.
812
+ We predict their binding affinity to the target and assess their basic ADMET (Absorption, Distribution,
813
+ Metabolism, Excretion, Toxicity) profiles.
814
+ """)
815
+
816
+ st.subheader("Inputs & Controls")
817
+
818
  p2_molecules = get_phase2_molecules()
819
  st.info(f"A library of {len(p2_molecules)} compounds is ready for screening.")
820
 
821
+ # Updated PDB ID for Interaction options
822
  interaction_pdb_options = {
823
+ "Neuraminidase + Oseltamivir (2HU4)": {"pdb": "2HU4", "ligand": "G39"},
824
+ "KRAS G12C + MRTX-1133 (7XKJ)": {"pdb": "7XKJ", "ligand": "M13"},
825
+ "SARS-CoV-2 Mpro + Ensitrelvir (8HUR)": {"pdb": "8HUR", "ligand": "X77"},
826
+ "EGFR + Erlotinib (1M17)": {"pdb": "1M17", "ligand": "ERL"},
827
  }
828
+ selected_interaction_pdb_name = st.selectbox(
829
+ "Select PDB ID for Interaction:",
830
+ options=list(interaction_pdb_options.keys()),
831
+ index=0 # Default to Neuraminidase
832
+ )
833
  p2_pdb_id = interaction_pdb_options[selected_interaction_pdb_name]["pdb"]
834
  p2_ligand_resn = interaction_pdb_options[selected_interaction_pdb_name]["ligand"]
835
 
836
+ st.write(f"Selected PDB: `{p2_pdb_id}`, Selected Ligand Residue Name: `{p2_ligand_resn}`")
837
+
838
+
839
  if st.button("πŸš€ Run Phase 2 Analysis", key="run_p2"):
840
+ with st.spinner("Running virtual screening and ADMET predictions..."):
841
  full_log = "--- Phase 2 Analysis Started ---\n"
842
+
843
  screening_df, log_screen = simulate_virtual_screening(p2_molecules)
844
  full_log += log_screen
845
  admet_df, log_admet = predict_admet_properties(p2_molecules)
846
  full_log += log_admet
847
+
848
  merged_df = pd.merge(screening_df, admet_df, on="Molecule")
849
+
850
  pdb_data, log_pdb_p2 = fetch_pdb_structure(p2_pdb_id)
851
  full_log += log_pdb_p2
852
+
853
+ interaction_view, log_interact = visualize_protein_ligand_interaction(pdb_data, p2_pdb_id, p2_ligand_resn)
854
  full_log += log_interact
 
 
855
 
856
+ full_log += "\n--- Phase 2 Analysis Complete ---"
857
+ st.session_state.log_p2 = full_log
858
+ st.session_state.results_p2 = {
859
+ 'merged_df': merged_df,
860
+ 'interaction_view': interaction_view
861
+ }
862
+
863
+ st.text_area("Status & Logs", st.session_state.log_p2, height=200, key="log_p2_area")
864
 
865
+ st.subheader("Results")
866
+ if not st.session_state.results_p2:
867
+ st.info("Click 'Run Phase 2 Analysis' to generate and display results.")
868
+ else:
869
  res2 = st.session_state.results_p2
870
  p2_tabs = st.tabs(["Screening & ADMET Results", "Protein-Ligand Interaction"])
871
+
872
  with p2_tabs[0]:
873
+ st.subheader("Virtual Screening & Early ADMET Predictions")
874
  st.dataframe(res2.get('merged_df', pd.DataFrame()), use_container_width=True, hide_index=True)
875
+
876
  with p2_tabs[1]:
877
+ st.subheader(f"Simulated Interaction for PDB {p2_pdb_id} with Ligand {p2_ligand_resn}")
878
+ if res2.get('interaction_view'):
879
+ st.components.v1.html(res2['interaction_view'], height=700, scrolling=False)
880
+ else:
881
+ st.warning("Could not display interaction view. Check inputs and logs.")
882
+
883
  # --- Phase 3: Lead Optimization ---
884
  with tab3:
885
  st.header("Phase 3: Lead Compound Optimization")
886
+ st.markdown("""
887
+ In lead optimization, promising 'hit' compounds are refined to improve their efficacy and safety.
888
+ Here, we analyze a few selected lead candidates, perform more detailed property calculations,
889
+ and predict their toxicity risk using a simulated machine learning model.
890
+ """)
891
+
892
+ st.subheader("Inputs & Controls")
893
+
894
  p3_molecules = get_phase3_molecules()
895
  selected_leads = st.multiselect(
896
+ "Select lead compounds to optimize:",
897
+ options=list(p3_molecules.keys()),
898
+ default=['Oseltamivir (Influenza)', 'Remdesivir (Antiviral)', 'Imatinib (Gleevec - Cancer)'] # Adjusted default selection
899
  )
900
 
901
  if st.button("πŸš€ Run Phase 3 Analysis", key="run_p3"):
902
+ with st.spinner("Analyzing lead compounds and predicting toxicity..."):
903
  full_log = "--- Phase 3 Analysis Started ---\n"
904
+
905
  smiles_to_analyze_p3 = {name: p3_molecules[name] for name in selected_leads}
906
+
907
  comp_props_df, log_comp = calculate_comprehensive_properties(smiles_to_analyze_p3)
908
  full_log += log_comp
909
+
910
  toxicity_df, log_tox = predict_toxicity(comp_props_df)
911
  full_log += log_tox
912
+
913
  final_df = pd.merge(comp_props_df, toxicity_df, on="Compound")
914
+
915
+ visuals = {}
916
+ for name, smiles in smiles_to_analyze_p3.items():
917
+ html_view, log_vis = visualize_molecule_2d_3d(smiles, name)
918
+ visuals[name] = html_view
919
+ full_log += log_vis
920
+
921
+ full_log += "\n--- Phase 3 Analysis Complete ---"
922
+ st.session_state.log_p3 = full_log
923
+ st.session_state.results_p3 = {
924
+ 'final_df': final_df,
925
+ 'visuals': visuals
926
+ }
927
 
928
+ st.text_area("Status & Logs", st.session_state.log_p3, height=200, key="log_p3_area")
929
 
930
+ st.subheader("Results")
931
+ if not st.session_state.results_p3:
932
+ st.info("Click 'Run Phase 3 Analysis' to generate and display results.")
933
+ else:
934
+ # Corrected from results_3 to results_p3
935
  res3 = st.session_state.results_p3
936
  st.subheader("Lead Compound Analysis & Toxicity Prediction")
937
  st.dataframe(res3.get('final_df', pd.DataFrame()), use_container_width=True, hide_index=True)
938
 
939
  st.subheader("2D & 3D Molecular Structures")
940
+ for name, visual_html in res3.get('visuals', {}).items():
941
+ st.components.v1.html(visual_html, height=430, scrolling=False)
942
+
 
 
 
 
 
 
 
 
943
 
944
  # --- Phase 4: Pre-clinical & RWE ---
945
  with tab4:
946
  st.header("Phase 4: Simulated Pre-clinical & Real-World Evidence (RWE)")
947
+ st.markdown("""
948
+ This final phase simulates post-market analysis. We analyze text data for adverse events (pharmacovigilance)
949
+ and present documentation related to the AI models and ethical frameworks that would be required for regulatory submission.
950
+ """)
951
+
952
+ st.subheader("Inputs & Controls")
953
+
954
  rwd_input = st.text_area(
955
  "Enter simulated adverse event report text:",
956
+ "Patient reports include instances of headache, severe nausea, and occasional skin rash. Some noted dizziness after taking the medication.",
957
+ height=150
958
  )
959
 
960
  if st.button("πŸš€ Run Phase 4 Analysis", key="run_p4"):
961
+ with st.spinner("Analyzing real-world data and generating reports..."):
962
  full_log = "--- Phase 4 Analysis Started ---\n"
963
+
964
  reg_df, log_reg = get_regulatory_summary()
965
  full_log += log_reg
966
+
967
  eth_df, log_eth = get_ethical_framework()
968
  full_log += log_eth
969
+
970
  rwd_df, plot_bar, log_rwd = simulate_rwd_analysis(rwd_input)
971
  full_log += log_rwd
972
+ full_log += "\n--- Phase 4 Analysis Complete ---"
973
+ st.session_state.log_p4 = full_log
974
+
975
+ st.session_state.results_p4 = {
976
+ 'rwd_df': rwd_df,
977
+ 'plot_bar': plot_bar,
978
+ 'reg_df': reg_df,
979
+ 'eth_df': eth_df
980
+ }
981
 
982
+ st.text_area("Status & Logs", st.session_state.log_p4, height=200, key="log_p4_area")
983
 
984
+ st.subheader("Results")
985
+ if not st.session_state.results_p4:
986
+ st.info("Click 'Run Phase 4 Analysis' to generate and display results.")
987
+ else:
988
  res4 = st.session_state.results_p4
989
  p4_tabs = st.tabs(["Pharmacovigilance Analysis", "Regulatory & Ethical Frameworks"])
990
+
991
  with p4_tabs[0]:
992
+ st.subheader("Simulated Adverse Event Analysis")
993
  if res4.get('plot_bar'):
994
  st.bokeh_chart(res4['plot_bar'], use_container_width=True)
995
  st.dataframe(res4.get('rwd_df', pd.DataFrame()), use_container_width=True, hide_index=True)
996
+
997
  with p4_tabs[1]:
998
  st.subheader("AI/ML Model Regulatory Summary")
999
  st.dataframe(res4.get('reg_df', pd.DataFrame()), use_container_width=True, hide_index=True)
1000
+
1001
  st.subheader("Ethical AI Framework")
1002
+ st.dataframe(res4.get('eth_df', pd.DataFrame()), use_container_width=True, hide_index=True)
1003
+