Spaces:

dev-jas
/

polymer-aging-ml

Running

App Files Files Community

devjas1 commited on Aug 25

Commit

40a522b

1 Parent(s): c8f5637

(FIX/FEATURE): Enhance app.py — improve error handling, stabilize UI, and add logging for analysis

Browse files

Files changed (1) hide show

app.py +195 -106

app.py CHANGED Viewed

@@ -5,6 +5,12 @@ This is an adapted version of the Streamlit app optimized for Hugging Face Space
 It maintains all the functionality of the original app while being self-contained and cloud-ready.
 """
 import os
 import sys
 from pathlib import Path
@@ -24,7 +30,8 @@ import io
 from pathlib import Path
 import time
 import gc
-from io import StringIO
 # Import local modules
 from models.figure2_cnn import Figure2CNN
@@ -43,6 +50,15 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
 # Constants
 TARGET_LEN = 500
 SAMPLE_DATA_DIR = "sample_data"
@@ -87,6 +103,15 @@ def label_file(filename: str) -> int:
         # Return None for unknown patterns instead of raising error
         return -1  # Default value for unknown patterns
 @st.cache_resource
 def load_model(model_name):
     """Load and cache the specified model with error handling"""
@@ -104,15 +129,17 @@ def load_model(model_name):
             st.info("Using randomly initialized model for demonstration purposes.")
             return model, False
         # Load weights
-        state_dict = torch.load(model_path, map_location="cpu")
-        model.load_state_dict(state_dict, strict=False)
-        if model is not None:
             model.eval()
         else:
-            raise ValueError("Model is not loaded. Please check the model configuration or weights.")
-        return model, True
     except Exception as e:
         st.error(f"❌ Error loading model {model_name}: {str(e)}")
@@ -133,7 +160,7 @@ def get_sample_files():
     return []
 def parse_spectrum_data(raw_text):
-    """Parse spectrum data from text with robust error handling"""
     x_vals, y_vals = [], []
     for line in raw_text.splitlines():
@@ -158,7 +185,22 @@ def parse_spectrum_data(raw_text):
     if len(x_vals) < 10:  # Minimum reasonable spectrum length
         raise ValueError(f"Insufficient data points: {len(x_vals)}. Need at least 10 points.")
-    return np.array(x_vals), np.array(y_vals)
 def create_spectrum_plot(x_raw, y_raw, y_resampled):
     """Create spectrum visualization plot"""
@@ -202,29 +244,73 @@ def get_confidence_description(logit_margin):
     else:
         return "LOW", "🔴"
-# Initialize session state
 def init_session_state():
-    """Initialize session state variables"""
     defaults = {
-        'status_message': "Ready to analyze polymer spectra 🔬",
-        'status_type': "info",
-        'uploaded_file': None,  # legacy; kept for compatibility
-        'input_text': None,     # ←←← NEW: canonical store for spectrum text
-        'filename': None,
-        'inference_run_once': False,
-        'x_raw': None,
-        'y_raw': None,
-        'y_resampled': None
     }
     for key, default_value in defaults.items():
         if key not in st.session_state:
             st.session_state[key] = default_value
 # Main app
 def main():
     init_session_state()
     # Header
     st.title("🔬 AI-Driven Polymer Classification")
     st.markdown("**Predict polymer degradation states using Raman spectroscopy and deep learning**")
@@ -273,107 +359,97 @@ def main():
     with col1:
         st.subheader("📁 Data Input")
-        # File upload tabs
-        tab1, tab2 = st.tabs(["📤 Upload File", "🧪 Sample Data"])
-        uploaded_file = None
-        with tab1:
-            uploaded_file = st.file_uploader(
-                "Upload Raman spectrum (.txt)",
                 type="txt",
                 help="Upload a text file with wavenumber and intensity columns",
-                key="upload_text"
             )
-            if uploaded_file:
-                # Read now and persist raw text; avoid holding open buffers in session_state
-                raw = uploaded_file.read()
-                text = raw.decode("utf-8") if isinstance(raw, bytes) else raw
-                st.session_state['input_text'] = text
-                st.session_state['filename'] = uploaded_file.name
-                st.session_state['uploaded_file'] = None # avoid stale buffers
-                st.success(f"✅ Loaded: {uploaded_file.name}")
-        with tab2:
             sample_files = get_sample_files()
             if sample_files:
-                sample_options = ["-- Select Sample --"] + [f.name for f in sample_files]
-                selected_sample = st.selectbox("Choose sample spectrum:", sample_options, key="sample_select")
-                if selected_sample != "-- Select Sample --":
-                    selected_path = Path(SAMPLE_DATA_DIR) / selected_sample
-                    try:
-                        with open(selected_path, "r", encoding="utf-8") as f:
-                            file_contents = f.read()
-                        # Persist raw text + name; no open file handles in session_state
-                        st.session_state['input_text'] = file_contents
-                        st.session_state['filename'] = selected_sample
-                        st.session_state['uploaded_file'] = None
-                        st.success(f"✅ Loaded sample: {selected_sample}")
-                    except (FileNotFoundError, IOError) as e:
-                        st.error(f"Error loading sample: {e}")
             else:
                 st.info("No sample data available")
-        # Update session state
-        # If we captured text via either tab, reflect readiness in status
-        if st.session_state.get('input_text'):
-            st.session_state['status_message'] = f"📁 File '{st.session_state.get('filename', '(unnamed)')}' ready for analysis"
-            st.session_state['status_type'] = "success"
-        # Status display
         st.subheader("🚦 Status")
-        status_msg = st.session_state.get("status_message", "Ready")
-        status_type = st.session_state.get("status_type", "info")
-        if status_type == "success":
-            st.success(status_msg)
-        elif status_type == "error":
-            st.error(status_msg)
         else:
-            st.info(status_msg)
-        # Load model
         model, model_loaded = load_model(model_choice)
-        # Ready if we have cached text and a model instance
-        inference_ready = bool(st.session_state.get('input_text')) and (model is not None)
         if not model_loaded:
             st.warning("⚠️ Model weights not available - using demo mode")
-        if st.button("▶️ Run Analysis", disabled=not inference_ready, type="primary", key="run_btn"):
-            if inference_ready:
-                try:
-                    # Use persisted text + filename (works for uploads and samples)
-                    raw_text = st.session_state.get('input_text')
-                    filename = st.session_state.get('filename') or "unknown.txt"
-                    if not raw_text:
-                        raise ValueError("No input text available. Please upload or select a sample.")
-                    # Parse spectrum
-                    with st.spinner("Parsing spectrum data..."):
-                        x_raw, y_raw = parse_spectrum_data(raw_text)
-                    # Resample spectrum
-                    with st.spinner("Resampling spectrum..."):
-                        y_resampled = resample_spectrum(x_raw, y_raw, TARGET_LEN)
-                    # Store in session state
-                    st.session_state['x_raw'] = x_raw
-                    st.session_state['y_raw'] = y_raw
-                    st.session_state['y_resampled'] = y_resampled
-                    st.session_state['inference_run_once'] = True
-                    st.session_state['status_message'] = f"🔍 Analysis completed for: {filename}"
-                    st.session_state['status_type'] = "success"
-                    st.rerun()
-                except Exception as e:
-                    st.error(f"❌ Analysis failed: {str(e)}")
-                    st.session_state['status_message'] = f"❌ Error: {str(e)}"
-                    st.session_state['status_type'] = "error"
     # Results column
     with col2:
@@ -394,6 +470,7 @@ def main():
                     st.image(spectrum_plot, caption="Spectrum Preprocessing Results", use_container_width=True)
                 except Exception as e:
                     st.warning(f"Could not generate plot: {e}")
                 # Run inference
                 try:
@@ -413,6 +490,7 @@ def main():
                             logits_list = logits.detach().numpy().tolist()[0]
                         inference_time = time.time() - start_time
                         # Clean up memory
                         cleanup_memory()
@@ -476,8 +554,14 @@ def main():
                     with tab2:
                         st.markdown("**Technical Information**")
                         st.json({
                             "Model Architecture": model_choice,
                             "Input Shape": list(input_tensor.shape),
                             "Output Shape": list(logits.shape),
                             "Inference Time": f"{inference_time:.3f}s",
@@ -488,6 +572,10 @@ def main():
                         if not model_loaded:
                             st.warning("⚠️ Demo mode: Using randomly initialized weights")
                     with tab3:
                         st.markdown("""
                         **🔍 Analysis Process**
@@ -513,6 +601,7 @@ def main():
                 except Exception as e:
                     st.error(f"❌ Inference failed: {str(e)}")
             else:
                 st.error("❌ Missing spectrum data. Please upload a file and run analysis.")
@@ -539,4 +628,4 @@ def main():
             """)
 # Run the application
-main()

 It maintains all the functionality of the original app while being self-contained and cloud-ready.
 """
+BUILD_LABEL = "proof-2025-08-24-01"
+import os, streamlit as st, sys
+st.sidebar.caption(
+    f"Build: {BUILD_LABEL} | __file__: {__file__} | cwd: {os.getcwd()} | py: {sys.version.split()[0]}"
+)
 import os
 import sys
 from pathlib import Path
 from pathlib import Path
 import time
 import gc
+import hashlib
+import logging
 # Import local modules
 from models.figure2_cnn import Figure2CNN
     initial_sidebar_state="expanded"
 )
+# Stabilize tab panel height on HF Spaces to prevent visible column jitter.
+# This sets a minimum height for the content area under the tab headers.
+st.markdown("""
+<style>
+/*  Tabs content area: the sibling after the tablist */
+    div[data-testid="stTabs"] > div[role="tablist"] + div { min-height: 420px;}
+</style>
+""", unsafe_allow_html=True)
 # Constants
 TARGET_LEN = 500
 SAMPLE_DATA_DIR = "sample_data"
         # Return None for unknown patterns instead of raising error
         return -1  # Default value for unknown patterns
+@st.cache_data
+def load_state_dict(_mtime, model_path):
+    """Load state dict with mtime in cache key to detect file changes"""
+    try:
+        return torch.load(model_path, map_location="cpu")
+    except Exception as e:
+        st.warning(f"Error loading state dict: {e}")
+        return None
 @st.cache_resource
 def load_model(model_name):
     """Load and cache the specified model with error handling"""
             st.info("Using randomly initialized model for demonstration purposes.")
             return model, False
+        # Get mtime for cache invalidation
+        mtime = os.path.getmtime(model_path)
         # Load weights
+        state_dict = load_state_dict(mtime, model_path)
+        if state_dict:
+            model.load_state_dict(state_dict, strict=True)
             model.eval()
+            return model, True
         else:
+            return model, False
     except Exception as e:
         st.error(f"❌ Error loading model {model_name}: {str(e)}")
     return []
 def parse_spectrum_data(raw_text):
+    """Parse spectrum data from text with robust error handling and validation"""
     x_vals, y_vals = [], []
     for line in raw_text.splitlines():
     if len(x_vals) < 10:  # Minimum reasonable spectrum length
         raise ValueError(f"Insufficient data points: {len(x_vals)}. Need at least 10 points.")
+    x = np.array(x_vals)
+    y = np.array(y_vals)
+    # Check for NaNs
+    if np.any(np.isnan(x)) or np.any(np.isnan(y)):
+        raise ValueError("Input data contains NaN values")
+    # Check monotonic increasing x
+    if not np.all(np.diff(x) > 0):
+        raise ValueError("Wavenumbers must be strictly increasing")
+    # Check reasonable range for Raman spectroscopy
+    if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100:
+        raise ValueError(f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100")
+    return x, y
 def create_spectrum_plot(x_raw, y_raw, y_resampled):
     """Create spectrum visualization plot"""
     else:
         return "LOW", "🔴"
 def init_session_state():
     defaults = {
+        "status_message": "Ready to analyze polymer spectra 🔬",
+        "status_type": "info",
+        "input_text": None,
+        "filename": None,
+        "input_source": None,     # "upload" or "sample"
+        "sample_select": "-- Select Sample --",
+        "input_mode": "Upload File",   # controls which pane is visible
+        "inference_run_once": False,
+        "x_raw": None, "y_raw": None, "y_resampled": None,
     }
+    for k, v in defaults.items():
+        st.session_state.setdefault(k, v)
     for key, default_value in defaults.items():
         if key not in st.session_state:
             st.session_state[key] = default_value
+def log_message(msg):
+    """Log message for observability"""
+    st.session_state['log_messages'].append(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}")
+def trigger_run():
+    """Set a flag so we can detect button press reliably across reruns"""
+    st.session_state['run_requested'] = True
+def on_upload_change():
+    """Read uploaded file once and persist as text."""
+    up = st.session_state.get("upload_txt")  # the uploader's key
+    if not up:
+        return
+    raw = up.read()
+    text = raw.decode("utf-8") if isinstance(raw, bytes) else raw
+    st.session_state["input_text"] = text
+    st.session_state["filename"] = getattr(up, "name", "uploaded.txt")
+    st.session_state["input_source"] = "upload"
+    st.session_state["status_message"] = f"📁 File '{st.session_state['filename']}' ready for analysis"
+    st.session_state["status_type"] = "success"
+def on_sample_change():
+    """Read selected sample once and persist as text."""
+    sel = st.session_state.get("sample_select", "-- Select Sample --")
+    if sel == "-- Select Sample --":
+        # Do nothing; leave current input intact (prevents clobbering uploads)
+        return
+    try:
+        text = (Path(SAMPLE_DATA_DIR) / sel).read_text(encoding="utf-8")
+        st.session_state["input_text"] = text
+        st.session_state["filename"] = sel
+        st.session_state["input_source"] = "sample"
+        st.session_state["status_message"] = f"📁 Sample '{sel}' ready for analysis"
+        st.session_state["status_type"] = "success"
+    except Exception as e:
+        st.session_state["status_message"] = f"❌ Error loading sample: {e}"
+        st.session_state["status_type"] = "error"
+def on_input_mode_change():
+    if st.session_state["input_mode"] == "Upload File":
+        # reset sample when switching to Upload
+        st.session_state["sample_select"] = "-- Select Sample --"
 # Main app
 def main():
     init_session_state()
     # Header
     st.title("🔬 AI-Driven Polymer Classification")
     st.markdown("**Predict polymer degradation states using Raman spectroscopy and deep learning**")
     with col1:
         st.subheader("📁 Data Input")
+        mode = st.radio(
+            "Input mode",
+            ["Upload File", "Sample Data"],
+            key="input_mode",
+            horizontal=True,
+            on_change=on_input_mode_change
+        )
+        # ---- Upload tab ----
+        if mode == "Upload File":
+            up = st.file_uploader(
+                "Upload Raman spectrum (.txt)",
                 type="txt",
                 help="Upload a text file with wavenumber and intensity columns",
+                key="upload_txt",
+                on_change=on_upload_change,  # <-- critical
             )
+            if up:
+                st.success(f"✅ Loaded: {up.name}")
+        # ---- Sample tab ----
+        else:
             sample_files = get_sample_files()
             if sample_files:
+                options = ["-- Select Sample --"] + [p.name for p in sample_files]
+                sel = st.selectbox(
+                    "Choose sample spectrum:",
+                    options,
+                    key="sample_select",
+                    on_change=on_sample_change,  # <-- critical
+                )
+                if sel != "-- Select Sample --":
+                    st.success(f"✅ Loaded sample: {sel}")
             else:
                 st.info("No sample data available")
+        # ---- Status box ----
         st.subheader("🚦 Status")
+        msg = st.session_state.get("status_message", "Ready")
+        typ = st.session_state.get("status_type", "info")
+        if typ == "success":
+            st.success(msg)
+        elif typ == "error":
+            st.error(msg)
         else:
+            st.info(msg)
+        # ---- Model load ----
         model, model_loaded = load_model(model_choice)
         if not model_loaded:
             st.warning("⚠️ Model weights not available - using demo mode")
+        # Ready to run if we have text and a model
+        inference_ready = bool(st.session_state.get("input_text")) and (model is not None)
+        # ---- Run Analysis (form submit batches state + submit atomically) ----
+        with st.form("analysis_form", clear_on_submit=False):
+            submitted = st.form_submit_button(
+                "▶️ Run Analysis",
+                type="primary",
+                disabled=not inference_ready,
+            )
+        if submitted and inference_ready:
+            try:
+                raw_text = st.session_state["input_text"]
+                filename = st.session_state.get("filename") or "unknown.txt"
+                # Parse
+                with st.spinner("Parsing spectrum data..."):
+                    x_raw, y_raw = parse_spectrum_data(raw_text)
+                # Resample
+                with st.spinner("Resampling spectrum..."):
+                    y_resampled = resample_spectrum(x_raw, y_raw, TARGET_LEN)
+                # Persist results (drives right column)
+                st.session_state["x_raw"] = x_raw
+                st.session_state["y_raw"] = y_raw
+                st.session_state["y_resampled"] = y_resampled
+                st.session_state["inference_run_once"] = True
+                st.session_state["status_message"] = f"🔍 Analysis completed for: {filename}"
+                st.session_state["status_type"] = "success"
+                st.rerun()
+            except Exception as e:
+                st.error(f"❌ Analysis failed: {e}")
+                st.session_state["status_message"] = f"❌ Error: {e}"
+                st.session_state["status_type"] = "error"
     # Results column
     with col2:
                     st.image(spectrum_plot, caption="Spectrum Preprocessing Results", use_container_width=True)
                 except Exception as e:
                     st.warning(f"Could not generate plot: {e}")
+                    log_message(f"Plot generation error: {e}")
                 # Run inference
                 try:
                             logits_list = logits.detach().numpy().tolist()[0]
                         inference_time = time.time() - start_time
+                        log_message(f"Inference completed in {inference_time:.2f}s, prediction: {prediction}")
                         # Clean up memory
                         cleanup_memory()
                     with tab2:
                         st.markdown("**Technical Information**")
+                        model_path = MODEL_CONFIG[model_choice]["path"]
+                        mtime = os.path.getmtime(model_path) if os.path.exists(model_path) else "N/A"
+                        file_hash = hashlib.md5(open(model_path, 'rb').read()).hexdigest() if os.path.exists(model_path) else "N/A"
                         st.json({
                             "Model Architecture": model_choice,
+                            "Model Path": model_path,
+                            "Weights Last Modified": time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(mtime)) if mtime != "N/A" else "N/A",
+                            "Weights Hash": file_hash,
                             "Input Shape": list(input_tensor.shape),
                             "Output Shape": list(logits.shape),
                             "Inference Time": f"{inference_time:.3f}s",
                         if not model_loaded:
                             st.warning("⚠️ Demo mode: Using randomly initialized weights")
+                        # Debug log
+                        st.markdown("**Debug Log**")
+                        st.text_area("Logs", "\n".join(st.session_state['log_messages']), height=200)
                     with tab3:
                         st.markdown("""
                         **🔍 Analysis Process**
                 except Exception as e:
                     st.error(f"❌ Inference failed: {str(e)}")
+                    log_message(f"Inference error: {str(e)}")
             else:
                 st.error("❌ Missing spectrum data. Please upload a file and run analysis.")
             """)
 # Run the application
+main()