Spaces:

alidenewade
/

mol-lang-lab

Running

App Files Files Community

alidenewade commited on Jun 23

Commit

c9fddab

verified ·

1 Parent(s): 45d1bdf

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -16

app.py CHANGED Viewed

@@ -3,13 +3,17 @@ import streamlit as st
 import torch
 from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 from rdkit import Chem
-from rdkit.Chem import Draw, AllChem
 import pandas as pd
 import py3Dmol
 import re
 import logging
-# Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -83,6 +87,8 @@ apply_custom_styling()
 # --- Model Loading (from mol_app) ---
 @st.cache_resource(show_spinner="Loading ChemBERTa model...")
 def load_optimized_models():
     """Load models with quantization and other optimizations."""
@@ -108,6 +114,7 @@ def load_optimized_models():
         model_kwargs["quantization_config"] = quantization_config
         model_kwargs["device_map"] = "auto"
     model = AutoModelForMaskedLM.from_pretrained(model_name, **model_kwargs)
     pipe = pipeline(
@@ -126,6 +133,8 @@ fill_mask_pipeline, tokenizer = load_optimized_models()
 def get_mol(smiles):
     """Converts SMILES to RDKit Mol object."""
     mol = Chem.MolFromSmiles(smiles)
     if mol:
         try:
@@ -192,7 +201,7 @@ def visualize_molecule_2d_3d(smiles: str, name: str, substructure_smarts=""):
         AllChem.EmbedMolecule(mol_3d, randomSeed=42)
         try:
             AllChem.MMFFOptimizeMolecule(mol_3d)
-        except:
             AllChem.ETKDGv3().Embed(mol_3d)
         sdf_data = Chem.MolToMolBlock(mol_3d)
@@ -275,7 +284,7 @@ def predict_and_generate_visualizations(smiles_mask, substructure_smarts):
 # --- Streamlit Interface ---
 st.title("🧪 ChemBERTa SMILES Utilities")
 st.markdown("""
-Enter a SMILES string with a `<mask>` token (e.g., `C1=CC=CC<mask>C1`) to predict possible completions.
 The model will generate the most likely atoms or fragments to fill the mask.
 """)
@@ -301,25 +310,40 @@ with tab1:
         submit_button = st.form_submit_button("🚀 Predict and Visualize", use_container_width=True)
-    if 'results_df' not in st.session_state or submit_button:
-        if submit_button or 'results_df' not in st.session_state:
-            with st.spinner("Running predictions... This may take a moment."):
-                df, html, log = predict_and_generate_visualizations(smiles_input_masked, substructure_input)
-                st.session_state.results_df = df
-                st.session_state.results_html = html
-                st.session_state.status_log = log
     st.subheader("Top Predictions & Scores")
     if 'results_df' in st.session_state and not st.session_state.results_df.empty:
-        st.dataframe(st.session_state.results_df, use_container_width=True, hide_index=True)
-        st.subheader("Predicted Molecule Visualizations (Top 5 Valid)")
-        st.components.v1.html(st.session_state.results_html, height=1850, scrolling=True)
     else:
         st.info("No valid predictions to display. Try a different input.")
     with st.expander("Show Logs"):
         if 'status_log' in st.session_state:
-            st.text_area("", st.session_state.status_log, height=200, key="log_area_pred")
 with tab2:
     st.header("Molecule Viewer")
@@ -340,4 +364,5 @@ with tab2:
     with st.expander("Show Logs"):
         if 'viewer_log' in st.session_state:
-            st.text_area("", st.session_state.viewer_log, height=100, key="log_area_view")

 import torch
 from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 from rdkit import Chem
+from rdkit.Chem import Draw, AllChem, rdBase
 import pandas as pd
 import py3Dmol
 import re
 import logging
+# --- Setup ---
+# Suppress RDKit console output for cleaner logs
+rdBase.DisableLog('rdApp.error')
+# Set up Python logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # --- Model Loading (from mol_app) ---
+# NOTE: The "missing ScriptRunContext" warnings in the logs are expected when not
+# running via the 'streamlit run' command. They can be safely ignored.
 @st.cache_resource(show_spinner="Loading ChemBERTa model...")
 def load_optimized_models():
     """Load models with quantization and other optimizations."""
         model_kwargs["quantization_config"] = quantization_config
         model_kwargs["device_map"] = "auto"
+    # The "Some weights of the model were not used" warning is expected and normal.
     model = AutoModelForMaskedLM.from_pretrained(model_name, **model_kwargs)
     pipe = pipeline(
 def get_mol(smiles):
     """Converts SMILES to RDKit Mol object."""
+    # The SMILES Parse Errors in logs are expected; RDKit warns about invalid
+    # molecules generated by the model, which this function handles gracefully.
     mol = Chem.MolFromSmiles(smiles)
     if mol:
         try:
         AllChem.EmbedMolecule(mol_3d, randomSeed=42)
         try:
             AllChem.MMFFOptimizeMolecule(mol_3d)
+        except Exception: # Fallback if MMFF fails
             AllChem.ETKDGv3().Embed(mol_3d)
         sdf_data = Chem.MolToMolBlock(mol_3d)
 # --- Streamlit Interface ---
 st.title("🧪 ChemBERTa SMILES Utilities")
 st.markdown("""
+Enter a SMILES string with a `<mask>` token to predict possible completions.
 The model will generate the most likely atoms or fragments to fill the mask.
 """)
         submit_button = st.form_submit_button("🚀 Predict and Visualize", use_container_width=True)
+    # --- Robust Session State Management ---
+    # This ensures the app loads with default predictions on the very first run,
+    # and only updates when the user clicks the button.
+    # The "Session state does not function" warning in logs is due to the execution
+    # environment and can be ignored.
+    if 'app_initialized' not in st.session_state:
+        with st.spinner("Running initial prediction..."):
+            df, html, log = predict_and_generate_visualizations(smiles_input_masked, substructure_input)
+            st.session_state.results_df = df
+            st.session_state.results_html = html
+            st.session_state.status_log = log
+            st.session_state.app_initialized = True
+    if submit_button:
+        with st.spinner("Running predictions... This may take a moment."):
+            df, html, log = predict_and_generate_visualizations(smiles_input_masked, substructure_input)
+            st.session_state.results_df = df
+            st.session_state.results_html = html
+            st.session_state.status_log = log
     st.subheader("Top Predictions & Scores")
     if 'results_df' in st.session_state and not st.session_state.results_df.empty:
+        st.dataframe(st.session_state.results__df, use_container_width=True, hide_index=True)
     else:
         st.info("No valid predictions to display. Try a different input.")
+    st.subheader("Predicted Molecule Visualizations (Top 5 Valid)")
+    if 'results_html' in st.session_state and st.session_state.results_html:
+        st.components.v1.html(st.session_state.results_html, height=1850, scrolling=True)
     with st.expander("Show Logs"):
         if 'status_log' in st.session_state:
+            # FIX: Added a label to st.text_area to resolve the accessibility warning.
+            st.text_area(label="Prediction Logs", value=st.session_state.status_log, height=200, key="log_area_pred")
 with tab2:
     st.header("Molecule Viewer")
     with st.expander("Show Logs"):
         if 'viewer_log' in st.session_state:
+            # FIX: Added a label to st.text_area to resolve the accessibility warning.
+            st.text_area(label="Viewer Logs", value=st.session_state.viewer_log, height=100, key="log_area_view")