Spaces:

alidenewade
/

mol-lang-lab

Sleeping

App Files Files Community

alidenewade commited on Jun 23

Commit

e56ed1f

verified ·

1 Parent(s): 3923798

Update app.py

Browse files

Files changed (1) hide show

app.py +267 -242

app.py CHANGED Viewed

@@ -1,87 +1,20 @@
 import streamlit as st
-import pandas as pd
 from rdkit import Chem
 from rdkit.Chem import Draw, AllChem
-from rdkit.Chem.Draw import rdMolDraw2D
-import py3Dmol
 import io
 import base64
 import logging
-import torch
-from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 # Set up logging to monitor quantization effects
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# --- Page Configuration ---
-st.set_page_config(
-    page_title="Molecule Explorer & Predictor",
-    page_icon="🔬",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-# Custom CSS for a professional, minimalist look (adapted from drug_app.txt)
-def apply_custom_styling():
-    st.markdown(
-        """
-        <style>
-        @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
-        html, body, [class*="st-"] {
-            font-family: 'Roboto', sans-serif;
-        }
-        .stApp {
-            background-color: rgb(28, 28, 28);
-            color: white;
-        }
-        /* Tab styles */
-        .stTabs [data-baseweb="tab-list"] {
-            gap: 24px;
-        }
-        .stTabs [data-baseweb="tab"] {
-            height: 50px;
-            white-space: pre-wrap;
-            background: none;
-            border-radius: 0px;
-            border-bottom: 2px solid #333;
-            padding: 10px 4px;
-            color: #AAA;
-        }
-        .stTabs [data-baseweb="tab"]:hover {
-            background: #222;
-            color: #FFF;
-        }
-        .stTabs [aria-selected="true"] {
-            border-bottom: 2px solid #00A0FF; /* Highlight color for active tab */
-            color: #FFF;
-        }
-        /* Button styles */
-        .stButton>button {
-            border-color: #00A0FF;
-            color: #00A0FF;
-        }
-        .stButton>button:hover {
-            border-color: #FFF;
-            color: #FFF;
-            background-color: #00A0FF;
-        }
-        </style>
-        """,
-        unsafe_allow_html=True
-    )
-apply_custom_styling()
 # --- Quantization Configuration ---
 def get_quantization_config():
     """
@@ -111,10 +44,11 @@ def get_torch_dtype():
     else:
         return torch.float32  # Keep full precision on CPU
-# --- Optimized Model Loading with Streamlit Caching ---
-@st.cache_resource(show_spinner="Loading molecular language model...")
 def load_optimized_models():
-    """Load models with quantization and other optimizations using Streamlit caching."""
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype = get_torch_dtype()
     quantization_config = get_quantization_config()
@@ -124,7 +58,7 @@ def load_optimized_models():
     # Model names
     model_name = "seyonec/PubChem10M_SMILES_BPE_450k"
-    # Load tokenizer
     fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
     # Load model with quantization if available
@@ -134,6 +68,7 @@ def load_optimized_models():
     if quantization_config is not None and torch.cuda.is_available(): # Quantization typically for GPU
         model_kwargs["quantization_config"] = quantization_config
         model_kwargs["device_map"] = "auto"
     elif torch.cuda.is_available():
         model_kwargs["device_map"] = "auto" # For non-quantized GPU loading
@@ -141,216 +76,306 @@ def load_optimized_models():
         model_kwargs["device_map"] = None # For CPU
     try:
         fill_mask_model = AutoModelForMaskedLM.from_pretrained(
             model_name,
             **model_kwargs
         )
         fill_mask_model.eval()
         pipeline_device = fill_mask_model.device.index if hasattr(fill_mask_model.device, 'type') and fill_mask_model.device.type == "cuda" else -1
         fill_mask_pipeline = pipeline(
             'fill-mask',
             model=fill_mask_model,
             tokenizer=fill_mask_tokenizer,
-            device=pipeline_device,
         )
         logger.info("Models loaded successfully with optimizations")
         return fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline
     except Exception as e:
         logger.error(f"Error loading optimized models: {e}")
         logger.info("Falling back to standard model loading...")
         return load_standard_models(model_name)
-@st.cache_resource(show_spinner="Loading standard molecular language model...")
-def load_standard_models(model_name="seyonec/PubChem10M_SMILES_BPE_450k"):
-    """Fallback standard model loading without quantization using Streamlit caching."""
     fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
     fill_mask_model = AutoModelForMaskedLM.from_pretrained(model_name)
     device_idx = 0 if torch.cuda.is_available() else -1
     fill_mask_pipeline = pipeline('fill-mask', model=fill_mask_model, tokenizer=fill_mask_tokenizer, device=device_idx)
     if torch.cuda.is_available():
         fill_mask_model.to("cuda")
-    logger.info("Standard models loaded successfully")
     return fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline
-# --- RDKit and Py3Dmol Visualization Functions ---
-def mol_to_svg(mol, size=(400, 300)):
-    """Converts an RDKit molecule object to an SVG image string using default RDKit colors."""
-    if not mol:
-        return None
-    drawer = rdMolDraw2D.MolDraw2DSVG(*size)
-    # Removing custom color settings as per user request to use default RDKit colors
-    # drawer.drawOptions().clearBackground = False # Keep background transparent/dark
-    # drawer.drawOptions().addStereoAnnotation = True
-    # drawer.drawOptions().baseFontSize = 0.8
-    # # Set dark theme colors for RDKit drawing - REMOVED AS PER USER REQUEST
-    # atom_colors = {
-    #     6: (0.8, 0.8, 0.8),  # Carbon (light gray)
-    #     7: (0.2, 0.5, 1.0),  # Nitrogen (blue)
-    #     8: (1.0, 0.2, 0.2),  # Oxygen (red)
-    #     9: (0.2, 0.8, 0.2),  # Fluorine (green)
-    #     15: (1.0, 0.5, 0.0), # Phosphorus (orange)
-    #     16: (1.0, 0.8, 0.0), # Sulfur (yellow)
-    #     17: (0.2, 0.7, 0.2), # Chlorine (dark green)
-    #     35: (0.5, 0.2, 0.8), # Bromine (purple)
-    #     53: (0.8, 0.2, 0.5), # Iodine (pink/magenta)
-    # }
-    # # Set default atom color
-    # drawer.drawOptions().setAtomColor(Chem.rdatomicnumlist.Get): (0.8, 0.8, 0.8) # Default to light gray for unknown atoms
-    # for atom_num, color in atom_colors.items():
-    #     drawer.drawOptions().setAtomColor(atom_num, color)
-    # drawer.drawOptions().bondColor = (0.7, 0.7, 0.7) # Bond color (medium gray)
-    # drawer.drawOptions().highlightColour = (0.2, 0.6, 1.0) # Highlight color (blue)
-    drawer.DrawMolecule(mol)
-    drawer.FinishDrawing()
-    svg = drawer.GetDrawingText()
-    return svg
-def mol_to_sdf(mol):
-    """Converts an RDKit molecule object to an SDF string."""
-    if not mol:
         return None
-    # Add hydrogens to the molecule
-    mol_with_h = Chem.AddHs(mol)
-    # Generate 3D coordinates using ETKDGv3, a common conformer generation method
-    # MaxAttempts is increased for robustness, randomSeed for reproducibility
     try:
-        AllChem.EmbedMolecule(mol_with_h, AllChem.ETKDGv3(), maxAttempts=50, randomSeed=42)
-        # Optimize 3D coordinates using Universal Force Field (UFF)
-        AllChem.UFFOptimizeMolecule(mol_with_h)
-        sdf_string = Chem.MolToMolBlock(mol_with_h)
-        return sdf_string
-    except Exception as e:
-        logger.error(f"Error generating 3D coordinates for SMILES: {Chem.MolToSmiles(mol)} - {e}")
         return None
-def visualize_molecule_3d(mol_sdf: str, width='100%', height=400):
     """
-    Generates an interactive 3D molecule visualization using py3Dmol.
-    Accepts an SDF string.
     """
-    if not mol_sdf:
-        return None
     try:
-        viewer = py3Dmol.view(width=width, height=height)
-        viewer.setBackgroundColor('#1C1C1C') # Dark background
-        viewer.addModel(mol_sdf, "sdf")
-        viewer.setStyle({'stick':{}, 'sphere':{'radius':0.3}}) # Stick and Sphere representation
-        viewer.zoomTo()
-        html_view = viewer._make_html()
-        return html_view
     except Exception as e:
-        st.error(f"Error generating 3D visualization: {e}")
-        return None
-# --- Main Streamlit Application Layout ---
-st.title("🔬 Molecule Explorer & Predictor")
-# Initialize session state for consistent data across reruns
-if 'tokenizer' not in st.session_state:
-    st.session_state.tokenizer, st.session_state.model, st.session_state.pipeline = load_optimized_models()
-tokenizer = st.session_state.tokenizer
-model = st.session_state.model
-fill_mask_pipeline = st.session_state.pipeline
-tab1, tab2 = st.tabs(["Molecule Viewer (2D & 3D)", "Masked SMILES Predictor"])
-with tab1:
-    st.header("Visualize Molecules in 2D and 3D")
-    smiles_input = st.text_input("Enter SMILES string:", "CCO", help="e.g., CCO (ethanol), C1=CC=CC=C1 (benzene)")
-    if st.button("View Molecule"):
-        if smiles_input:
-            mol = Chem.MolFromSmiles(smiles_input)
-            if mol:
-                st.subheader("2D Structure")
-                svg = mol_to_svg(mol)
-                if svg:
-                    st.image(svg, use_column_width=True)
-                else:
-                    st.warning("Could not generate 2D image.")
-                st.subheader("3D Structure (Interactive)")
-                sdf_string = mol_to_sdf(mol)
-                if sdf_string:
-                    html_3d = visualize_molecule_3d(sdf_string)
-                    if html_3d:
-                        st.components.v1.html(html_3d, width=700, height=500, scrolling=False)
                     else:
-                        st.warning("Could not generate 3D visualization.")
-                else:
-                    st.warning("Could not generate 3D SDF data.")
-            else:
-                st.error("Invalid SMILES string. Please enter a valid chemical structure.")
-        else:
-            st.info("Please enter a SMILES string to view the molecule.")
 with tab2:
-    st.header("Masked SMILES Prediction")
-    masked_smiles_input = st.text_input(
-        "Enter masked SMILES string (use `<mask>` for the masked token):",
-        "C1=CC=CC<mask>C1",
-        help="Example: 'C1=CC=CC<mask>C1' (masked benzene), 'CCO<mask>C' (masked ether)"
-    )
-    top_k_predictions = st.slider("Number of predictions to show:", 1, 10, 5)
-    if st.button("Predict Masked Token"):
-        if masked_smiles_input and "<mask>" in masked_smiles_input:
-            try:
-                # Perform prediction using the loaded pipeline
-                predictions = fill_mask_pipeline(masked_smiles_input, top_k=top_k_predictions)
-                prediction_data = []
-                for pred in predictions:
-                    token_str = pred['token_str']
-                    sequence = pred['sequence']
-                    score = pred['score']
-                    mol = Chem.MolFromSmiles(sequence)
-                    img_svg = None
-                    if mol:
-                        img_svg = mol_to_svg(mol, size=(200,150)) # Smaller image for table
-                    prediction_data.append({
-                        "Predicted Token": token_str,
-                        "Full SMILES": sequence,
-                        "Confidence Score": f"{score:.4f}",
-                        "Structure SVG": img_svg # Store SVG string
-                    })
-                df_predictions = pd.DataFrame(prediction_data)
-                st.subheader("Predictions:")
-                # Create a version of the dataframe without the SVG for initial display
-                display_df = df_predictions.drop(columns=["Structure SVG"])
-                st.dataframe(display_df, use_container_width=True, hide_index=True)
-                st.subheader("Predicted Structures:")
-                # Determine the number of columns based on the number of predictions, up to a max
-                num_cols = min(len(df_predictions), 5) # Display up to 5 images per row
-                cols = st.columns(num_cols)
-                for i, row in df_predictions.iterrows():
-                    with cols[i % num_cols]: # Distribute images into columns
-                        st.markdown(f"**{row['Predicted Token']}** (Score: {row['Confidence Score']})")
-                        if row['Structure SVG']:
-                            st.image(row['Structure SVG'], use_column_width='auto')
-                        else:
-                            st.write("*(Invalid SMILES)*")
-            except Exception as e:
-                st.error(f"An error occurred during prediction: {e}")
-                st.info("Please ensure your masked SMILES is valid and contains `<mask>`.")
-        else:
-            st.info("Please enter a masked SMILES string (e.g., `C1=CC=CC<mask>C1`).")

+# app.py
 import streamlit as st
+import torch
+from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline, BitsAndBytesConfig
 from rdkit import Chem
 from rdkit.Chem import Draw, AllChem
+from rdkit.Chem.Draw import MolToImage
+import pandas as pd
 import io
 import base64
 import logging
+import py3Dmol
 # Set up logging to monitor quantization effects
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # --- Quantization Configuration ---
 def get_quantization_config():
     """
     else:
         return torch.float32  # Keep full precision on CPU
+# --- Optimized Model Loading ---
+@st.cache_resource
 def load_optimized_models():
+    """Load models with quantization and other optimizations.
+    Uses st.cache_resource to avoid reloading models on every rerun."""
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype = get_torch_dtype()
     quantization_config = get_quantization_config()
     # Model names
     model_name = "seyonec/PubChem10M_SMILES_BPE_450k"
+    # Load tokenizer (doesn't need quantization)
     fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
     # Load model with quantization if available
     if quantization_config is not None and torch.cuda.is_available(): # Quantization typically for GPU
         model_kwargs["quantization_config"] = quantization_config
+        # device_map="auto" is often used with bitsandbytes for automatic distribution
         model_kwargs["device_map"] = "auto"
     elif torch.cuda.is_available():
         model_kwargs["device_map"] = "auto" # For non-quantized GPU loading
         model_kwargs["device_map"] = None # For CPU
     try:
+        # Masked LM Model
         fill_mask_model = AutoModelForMaskedLM.from_pretrained(
             model_name,
             **model_kwargs
         )
+        # Set model to evaluation mode for inference
         fill_mask_model.eval()
+        # Create optimized pipeline
+        # Let pipeline infer device from model if possible, or set based on model's device
         pipeline_device = fill_mask_model.device.index if hasattr(fill_mask_model.device, 'type') and fill_mask_model.device.type == "cuda" else -1
         fill_mask_pipeline = pipeline(
             'fill-mask',
             model=fill_mask_model,
             tokenizer=fill_mask_tokenizer,
+            device=pipeline_device, # Use model's device
+            # torch_dtype=torch_dtype # Pipeline might infer this or it might conflict
         )
         logger.info("Models loaded successfully with optimizations")
         return fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline
     except Exception as e:
         logger.error(f"Error loading optimized models: {e}")
+        # Fallback to standard loading
         logger.info("Falling back to standard model loading...")
         return load_standard_models(model_name)
+def load_standard_models(model_name):
+    """Fallback standard model loading without quantization."""
     fill_mask_tokenizer = AutoTokenizer.from_pretrained(model_name)
     fill_mask_model = AutoModelForMaskedLM.from_pretrained(model_name)
+    # Determine device for standard loading
     device_idx = 0 if torch.cuda.is_available() else -1
     fill_mask_pipeline = pipeline('fill-mask', model=fill_mask_model, tokenizer=fill_mask_tokenizer, device=device_idx)
     if torch.cuda.is_available():
         fill_mask_model.to("cuda")
     return fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline
+# Load models with optimizations
+fill_mask_tokenizer, fill_mask_model, fill_mask_pipeline = load_optimized_models()
+# --- Memory Management Utilities ---
+def clear_gpu_cache():
+    """Clear CUDA cache to free up memory."""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+# --- Helper Functions from Notebook (adapted) ---
+def get_mol(smiles):
+    """Converts SMILES to RDKit Mol object and Kekulizes it."""
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
         return None
     try:
+        Chem.Kekulize(mol)
+    except: # Kekulization can fail for some structures
+        pass
+    return mol
+def find_matches_one(mol, submol_smarts):
+    """Finds all matching atoms for a SMARTS pattern in a molecule."""
+    if not mol or not submol_smarts:
+        return []
+    submol = Chem.MolFromSmarts(submol_smarts)
+    if not submol:
+        return []
+    matches = mol.GetSubstructMatches(submol)
+    return matches
+def get_image_with_highlight(mol, atomset=None, size=(300, 300)):
+    """Draws molecule with optional atom highlighting."""
+    if mol is None:
+        return None
+    highlight_color = (0, 1, 0, 0.5) # Green with some transparency
+    # Ensure atomset contains integers if not None or empty
+    valid_atomset = []
+    if atomset:
+        try:
+            valid_atomset = [int(a) for a in atomset]
+        except ValueError:
+            logger.warning(f"Invalid atom in atomset: {atomset}. Proceeding without highlighting problematic atoms.")
+            valid_atomset = [int(a) for a in atomset if str(a).isdigit()] # Filter out non-integers
+    img = MolToImage(mol, size=size, fitImage=True,
+                     highlightAtoms=valid_atomset if valid_atomset else [],
+                     highlightAtomColors={i: highlight_color for i in valid_atomset} if valid_atomset else {})
+    return img
+def mol_to_sdf_string(mol):
+    """Converts an RDKit Mol object to an SDF string."""
+    if mol is None:
         return None
+    # Add 3D coordinates if not present
+    AllChem.EmbedMolecule(mol, AllChem.ETKDG())
+    AllChem.UFFOptimizeMolecule(mol)
+    return Chem.MolToMolBlock(mol)
+def render_mol_3d(sdf_string, width=300, height=300):
+    """Renders a 3D molecule using py3Dmol."""
+    if sdf_string is None:
+        return ""
+    viewer = py3Dmol.view(width=width, height=height)
+    viewer.addModel(sdf_string, 'sdf')
+    viewer.setStyle({'stick':{}}) # Display as sticks
+    viewer.zoomTo()
+    # Embed the viewer HTML into Streamlit
+    return viewer.to_html()
+# --- Streamlit Interface Functions ---
+def predict_and_visualize_masked_smiles(smiles_mask, substructure_smarts_highlight="CC=CC"):
     """
+    Predicts masked tokens in a SMILES string, shows scores, and visualizes molecules.
+    Returns 5 image paths and a status message.
     """
+    if fill_mask_tokenizer.mask_token not in smiles_mask:
+        st.error("Error: Input SMILES must contain a mask token (e.g., <mask>).")
+        return pd.DataFrame(), [None]*5, [None]*5, "Error: Input SMILES must contain a mask token (e.g., <mask>)."
     try:
+        with torch.no_grad():
+            predictions = fill_mask_pipeline(smiles_mask, top_k=10)
     except Exception as e:
+        clear_gpu_cache()
+        st.error(f"Error during prediction: {str(e)}")
+        return pd.DataFrame(), [None]*5, [None]*5, f"Error during prediction: {str(e)}"
+    results_data = []
+    image_2d_list = []
+    image_3d_list = []
+    valid_predictions_count = 0
+    for pred in predictions:
+        if valid_predictions_count >= 5:
+            break
+        predicted_smiles = pred['sequence']
+        score = pred['score']
+        mol = get_mol(predicted_smiles)
+        if mol:
+            results_data.append({"Predicted SMILES": predicted_smiles, "Score": f"{score:.4f}"})
+            atom_matches_indices = []
+            if substructure_smarts_highlight:
+                matches = find_matches_one(mol, substructure_smarts_highlight)
+                if matches:
+                    atom_matches_indices = list(matches[0]) # Highlight first match
+            img_2d = get_image_with_highlight(mol, atomset=atom_matches_indices)
+            image_2d_list.append(img_2d)
+            # For 3D, we need an SDF string
+            sdf_string = mol_to_sdf_string(mol)
+            img_3d_html = render_mol_3d(sdf_string, width=300, height=300)
+            image_3d_list.append(img_3d_html)
+            valid_predictions_count += 1
+    # Pad image lists if fewer than 5 valid predictions
+    while len(image_2d_list) < 5:
+        image_2d_list.append(None)
+        image_3d_list.append(None)
+    df_results = pd.DataFrame(results_data)
+    clear_gpu_cache()
+    status_message = "Prediction successful." if valid_predictions_count > 0 else "No valid molecules found for top predictions."
+    return df_results, image_2d_list, image_3d_list, status_message
+def display_molecule_with_3d(smiles_string):
+    """
+    Displays a 2D image and 3D visualization of a molecule from its SMILES string.
+    """
+    if not smiles_string:
+        return None, None, "Please enter a SMILES string."
+    mol = get_mol(smiles_string)
+    if mol is None:
+        return None, None, "Invalid SMILES string."
+    img_2d = MolToImage(mol, size=(400, 400), fitImage=True)
+    sdf_string = mol_to_sdf_string(mol)
+    img_3d_html = render_mol_3d(sdf_string, width=400, height=400)
+    return img_2d, img_3d_html, "Molecule displayed."
+# --- Streamlit UI Definition ---
+# Set wide mode and background color
+st.set_page_config(layout="wide")
+st.markdown(
+    """
+    <style>
+    .stApp {
+        background-color: rgb(28,28,28);
+        color: white; /* Ensure text is visible on dark background */
+    }
+    .stDataFrame {
+        color: black; /* Default DataFrame text color */
+    }
+    h1, h2, h3, h4, h5, h6, .stMarkdown {
+        color: white;
+    }
+    .css-1d391kg, .css-1dp5dn1 { /* Target Streamlit's main content and sidebar */
+        color: white;
+    }
+    .streamlit-expanderContent {
+        background-color: rgb(40,40,40); /* Slightly lighter background for expanders */
+        border-radius: 10px;
+        padding: 10px;
+    }
+    /* Style for text inputs and buttons */
+    .stTextInput>div>div>input {
+        background-color: rgb(50,50,50);
+        color: white;
+        border-radius: 5px;
+        border: 1px solid rgb(70,70,70);
+    }
+    .stButton>button {
+        background-color: rgb(0,128,255); /* Blue button */
+        color: white;
+        border-radius: 8px;
+        padding: 10px 20px;
+        border: none;
+        transition: background-color 0.3s ease;
+    }
+    .stButton>button:hover {
+        background-color: rgb(0,100,200);
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+st.title("ChemBERTa SMILES Utilities Dashboard")
+tab1, tab2 = st.tabs(["Masked SMILES Prediction", "Molecule Viewer"])
+with tab1:
+    st.markdown("Enter a SMILES string with a `<mask>` token (e.g., `C1=CC=CC<mask>C1`) to predict possible completions.")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        smiles_input_masked = st.text_input("SMILES String with Mask", value="C1=CC=CC<mask>C1")
+    with col2:
+        substructure_input = st.text_input("Substructure to Highlight (SMARTS)", value="C=C")
+    if st.button("Predict and Visualize", key="predict_button"):
+        with st.spinner("Predicting and visualizing..."):
+            df_predictions, img_2d_list, img_3d_list, status_msg = predict_and_visualize_masked_smiles(
+                smiles_input_masked, substructure_input
+            )
+            st.write(status_msg)
+            if not df_predictions.empty:
+                st.subheader("Top Predictions & Scores")
+                st.dataframe(df_predictions, use_container_width=True)
+                st.subheader("Predicted Molecule Visualizations (Top 5 Valid)")
+                for i in range(5):
+                    if img_2d_list[i] is not None:
+                        st.markdown(f"**Prediction {i+1}**")
+                        cols_img = st.columns(2)
+                        with cols_img[0]:
+                            st.image(img_2d_list[i], caption=f"2D Prediction {i+1}", use_column_width=True)
+                        with cols_img[1]:
+                            st.components.v1.html(img_3d_list[i], height=300)
                     else:
+                        if i < len(df_predictions): # Only show 'No visualization' if there was a prediction attempt
+                             st.markdown(f"**Prediction {i+1}**: No visualization available (invalid SMILES or error).")
 with tab2:
+    st.markdown("Enter a SMILES string to display its 2D and 3D structure.")
+    smiles_input_viewer = st.text_input("SMILES String", value="C1=CC=CC=C1", key="viewer_smiles_input")
+    if st.button("View Molecule", key="view_button"):
+        with st.spinner("Displaying molecule..."):
+            img_2d_viewer, img_3d_viewer_html, status_viewer_msg = display_molecule_with_3d(smiles_input_viewer)
+            st.write(status_viewer_msg)
+            if img_2d_viewer is not None:
+                cols_viewer = st.columns(2)
+                with cols_viewer[0]:
+                    st.image(img_2d_viewer, caption="2D Molecule Structure", use_column_width=True)
+                with cols_viewer[1]:
+                    st.components.v1.html(img_3d_viewer_html, height=400)
+            else:
+                st.warning("Could not display molecule. Please check the SMILES string.")