from models.resnet_cnn import ResNet1D from models.figure2_cnn import Figure2CNN import hashlib import gc import time import io from PIL import Image import matplotlib.pyplot as plt import matplotlib import numpy as np import torch import torch.nn.functional as F import streamlit as st import os import sys from pathlib import Path # Ensure 'utils' directory is in the Python path utils_path = Path(__file__).resolve().parent / "utils" if utils_path.is_dir() and str(utils_path) not in sys.path: sys.path.append(str(utils_path)) matplotlib.use("Agg") # ensure headless rendering in Spaces # Import local modules from utils.preprocessing import resample_spectrum KEEP_KEYS = { # === global UI context we want to keep after "Reset" === "model_select", # sidebar model key "input_mode", # radio for Upload|Sample "uploader_version", # version counter for file uploader "input_registry", # radio controlling Upload vs Sample } # Configuration st.set_page_config( page_title="ML Polymer Classification", page_icon="🔬", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # Constants TARGET_LEN = 500 SAMPLE_DATA_DIR = Path("sample_data") # Prefer env var, else 'model_weights' if present; else canonical 'outputs' MODEL_WEIGHTS_DIR = ( os.getenv("WEIGHTS_DIR") or ("model_weights" if os.path.isdir("model_weights") else "outputs") ) # Model configuration MODEL_CONFIG = { "Figure2CNN (Baseline)": { "class": Figure2CNN, "path": f"{MODEL_WEIGHTS_DIR}/figure2_model.pth", "emoji": "", "description": "Baseline CNN with standard filters", "accuracy": "94.80%", "f1": "94.30%" }, "ResNet1D (Advanced)": { "class": ResNet1D, "path": f"{MODEL_WEIGHTS_DIR}/resnet_model.pth", "emoji": "", "description": "Residual CNN with deeper feature learning", "accuracy": "96.20%", "f1": "95.90%" } } # Label mapping LABEL_MAP = {0: "Stable (Unweathered)", 1: "Weathered (Degraded)"} # === UTILITY FUNCTIONS === def init_session_state(): """Keep a persistent session state""" defaults = { "status_message": "Ready to analyze polymer spectra 🔬", "status_type": "info", "input_text": None, "filename": None, "input_source": None, # "upload" or "sample" "sample_select": "-- Select Sample --", "input_mode": "Upload File", # controls which pane is visible "inference_run_once": False, "x_raw": None, "y_raw": None, "y_resampled": None, "log_messages": [], "uploader_version": 0, "current_upload_key": "upload_txt_0", } for k, v in defaults.items(): st.session_state.setdefault(k, v) for key, default_value in defaults.items(): if key not in st.session_state: st.session_state[key] = default_value def label_file(filename: str) -> int: """Extract label from filename based on naming convention""" name = Path(filename).name.lower() if name.startswith("sta"): return 0 elif name.startswith("wea"): return 1 else: # Return None for unknown patterns instead of raising error return -1 # Default value for unknown patterns @st.cache_data def load_state_dict(_mtime, model_path): """Load state dict with mtime in cache key to detect file changes""" try: return torch.load(model_path, map_location="cpu", weights_only=True) except (FileNotFoundError, RuntimeError) as e: st.warning(f"Error loading state dict: {e}") return None @st.cache_resource def load_model(model_name): """Load and cache the specified model with error handling""" try: config = MODEL_CONFIG[model_name] model_class = config["class"] model_path = config["path"] # Initialize model model = model_class(input_length=TARGET_LEN) # Check if model file exists if not os.path.exists(model_path): st.warning(f"⚠️ Model weights not found: {model_path}") st.info("Using randomly initialized model for demonstration purposes.") return model, False # Get mtime for cache invalidation mtime = os.path.getmtime(model_path) # Load weights state_dict = load_state_dict(mtime, model_path) if state_dict: model.load_state_dict(state_dict, strict=True) if model is None: raise ValueError( "Model is not loaded. Please check the model configuration or weights.") model.eval() return model, True else: return model, False except (FileNotFoundError, KeyError) as e: st.error(f"❌ Error loading model {model_name}: {str(e)}") return None, False def cleanup_memory(): """Clean up memory after inference""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() @st.cache_data def get_sample_files(): """Get list of sample files if available""" sample_dir = Path(SAMPLE_DATA_DIR) if sample_dir.exists(): return sorted(list(sample_dir.glob("*.txt"))) return [] def parse_spectrum_data(raw_text): """Parse spectrum data from text with robust error handling and validation""" x_vals, y_vals = [], [] for line in raw_text.splitlines(): line = line.strip() if not line or line.startswith('#'): # Skip empty lines and comments continue try: # Handle different separators parts = line.replace(",", " ").split() numbers = [p for p in parts if p.replace('.', '', 1).replace( '-', '', 1).replace('+', '', 1).isdigit()] if len(numbers) >= 2: x, y = float(numbers[0]), float(numbers[1]) x_vals.append(x) y_vals.append(y) except ValueError: # Skip problematic lines but don't fail completely continue if len(x_vals) < 10: # Minimum reasonable spectrum length raise ValueError( f"Insufficient data points: {len(x_vals)}. Need at least 10 points.") x = np.array(x_vals) y = np.array(y_vals) # Check for NaNs if np.any(np.isnan(x)) or np.any(np.isnan(y)): raise ValueError("Input data contains NaN values") # Check monotonic increasing x if not np.all(np.diff(x) > 0): raise ValueError("Wavenumbers must be strictly increasing") # Check reasonable range for Raman spectroscopy if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100: raise ValueError( f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100") return x, y def create_spectrum_plot(x_raw, y_raw, x_resampled, y_resampled): """Create spectrum visualization plot""" fig, ax = plt.subplots(1, 2, figsize=(13, 5), dpi=100) # == Raw spectrum == ax[0].plot(x_raw, y_raw, label="Raw", color="dimgray", linewidth=1) ax[0].set_title("Raw Input Spectrum") ax[0].set_xlabel("Wavenumber (cm⁻¹)") ax[0].set_ylabel("Intensity") ax[0].grid(True, alpha=0.3) ax[0].legend() # == Resampled spectrum == ax[1].plot(x_resampled, y_resampled, label="Resampled", color="steelblue", linewidth=1) ax[1].set_title(f"Resampled ({len(y_resampled)} points)") ax[1].set_xlabel("Wavenumber (cm⁻¹)") ax[1].set_ylabel("Intensity") ax[1].grid(True, alpha=0.3) ax[1].legend() plt.tight_layout() # == Convert to image == buf = io.BytesIO() plt.savefig(buf, format='png', bbox_inches='tight', dpi=100) buf.seek(0) plt.close(fig) # Prevent memory leaks return Image.open(buf) def _pct(p: float) -> str: # Fixed-width percent like " 98.7%" or " 2.3%" return f"{float(p)*100:5.1f}%" def render_confidence_progress( probs: np.ndarray, labels: list[str] = ["Stable", "Weathered"], highlight_idx: int | None = None, side_by_side: bool = True ): """Render Streamlit native progress bars (0 - 100). Optionally bold the winning class and place the two bars side-by-side for compactness.""" p = np.asarray(probs, dtype=float) p = np.clip(p, 0.0, 1.0) def _title(i: int, lbl: str, val: float) -> str: t = f"{lbl} - {val*100:.1f}%" return f"**{t}**" if (highlight_idx is not None and i == highlight_idx) else t if side_by_side: cols = st.columns(len(labels)) for i, (lbl, val, col) in enumerate(zip(labels, p, cols)): with col: st.markdown(_title(i, lbl, float(val))) st.progress(int(round(val * 100))) else: for i, (lbl, val) in enumerate(zip(labels, p)): st.markdown(_title(i, lbl, float(val))) st.progress(int(round(val * 100))) def render_kv_grid(d: dict, ncols: int = 2): """Display dict as a clean grid of key/value rows.""" if not d: return items = list(d.items()) cols = st.columns(ncols) for i, (k, v) in enumerate(items): with cols[i % ncols]: st.markdown( f"

{k}" f"{v}

", unsafe_allow_html=True ) def render_model_meta(model_choice: str): info = MODEL_CONFIG.get(model_choice, {}) emoji = info.get("emoji", "") desc = info.get("description", "").strip() acc = info.get("accuracy", "-") f1 = info.get("f1", "-") st.caption(f"{emoji} **Model Snapshot** - {model_choice}") cols = st.columns(2) with cols[0]: st.metric("Accuracy", acc) with cols[1]: st.metric("F1 Score", f1) if desc: st.caption(desc) def get_confidence_description(logit_margin): """Get human-readable confidence description""" if logit_margin > 1000: return "VERY HIGH", "🟢" elif logit_margin > 250: return "HIGH", "🟡" elif logit_margin > 100: return "MODERATE", "🟠" else: return "LOW", "🔴" def log_message(msg: str): """Append a timestamped line to the in-app log, creating the buffer if needed.""" if "log_messages" not in st.session_state or st.session_state["log_messages"] is None: st.session_state["log_messages"] = [] st.session_state["log_messages"].append( f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}" ) def trigger_run(): """Set a flag so we can detect button press reliably across reruns""" st.session_state['run_requested'] = True def on_sample_change(): """Read selected sample once and persist as text.""" sel = st.session_state.get("sample_select", "-- Select Sample --") if sel == "-- Select Sample --": return try: text = (Path(SAMPLE_DATA_DIR / sel).read_text(encoding="utf-8")) st.session_state["input_text"] = text st.session_state["filename"] = sel st.session_state["input_source"] = "sample" # 🔧 Clear previous results so right column resets immediately reset_results("New sample selected") st.session_state["status_message"] = f"📁 Sample '{sel}' ready for analysis" st.session_state["status_type"] = "success" except (FileNotFoundError, IOError) as e: st.session_state["status_message"] = f"❌ Error loading sample: {e}" st.session_state["status_type"] = "error" def on_input_mode_change(): """Reset sample when switching to Upload""" if st.session_state["input_mode"] == "Upload File": st.session_state["sample_select"] = "-- Select Sample --" # 🔧 Reset when switching modes to prevent stale right-column visuals reset_results("Switched input mode") def on_model_change(): """Force the right column back to init state when the model changes""" reset_results("Model changed") def reset_results(reason: str = ""): """Clear previous inference artifacts so the right column returns to initial state.""" st.session_state["inference_run_once"] = False st.session_state["x_raw"] = None st.session_state["y_raw"] = None st.session_state["y_resampled"] = None # ||== Clear logs between runs ==|| st.session_state["log_messages"] = [] # ||== Always reset the status box ==|| st.session_state["status_message"] = ( f"ℹ️ {reason}" if reason else "Ready to analyze polymer spectra 🔬" ) st.session_state["status_type"] = "info" def reset_ephemeral_state(): """remove everything except KEPT global UI context""" for k in list(st.session_state.keys()): if k not in KEEP_KEYS: st.session_state.pop(k, None) # == bump the uploader version → new widget instance with empty value == st.session_state["uploader_version"] += 1 st.session_state["current_upload_key"] = f"upload_txt_{st.session_state['uploader_version']}" # == reseed other emphemeral state == st.session_state["input_text"] = None st.session_state["filename"] = None st.session_state["input_source"] = None st.session_state["sample_select"] = "-- Select Sample --" # == return the UI to a clean state == st.session_state["inference_run_once"] = False st.session_state["x_raw"] = None st.session_state["y_raw"] = None st.session_state["y_resampled"] = None st.session_state["log_messages"] = [] st.session_state["status_message"] = "Ready to analyze polymer spectra 🔬" st.session_state["status_type"] = "info" st.rerun() # Main app def main(): init_session_state() # Sidebar with st.sidebar: # Header st.header("AI-Driven Polymer Classification") st.caption("Predict polymer degradation (Stable vs Weathered) from Raman spectra using validated CNN models. — v0.1") model_labels = [f"{MODEL_CONFIG[name]['emoji']} {name}" for name in MODEL_CONFIG.keys()] selected_label = st.selectbox("Choose AI Model", model_labels, key="model_select", on_change=on_model_change) model_choice = selected_label.split(" ", 1)[1] # ===Compact metadata directly under dropdown=== render_model_meta(model_choice) # ===Collapsed info to reduce clutter=== with st.expander("About This App",icon=":material/info:", expanded=False): st.markdown(""" AI-Driven Polymer Aging Prediction and Classification **Purpose**: Classify polymer degradation using AI **Input**: Raman spectroscopy `.txt` files **Models**: CNN architectures for binary classification **Next**: More trained CNNs in evaluation pipeline --- **Contributors** Dr. Sanmukh Kuppannagari (Mentor) Dr. Metin Karailyan (Mentor) 👨‍💻 Jaser Hasan (Author) --- **Links** 🔗 [Live HF Space](https://huggingface.co/spaces/dev-jas/polymer-aging-ml) 📂 [GitHub Repository](https://github.com/KLab-AI3/ml-polymer-recycling) --- **Citation Figure2CNN (baseline)** Neo et al., 2023, *Resour. Conserv. Recycl.*, 188, 106718. [https://doi.org/10.1016/j.resconrec.2022.106718](https://doi.org/10.1016/j.resconrec.2022.106718) """) # Main content area col1, col2 = st.columns([1, 1.35], gap="small") with col1: st.markdown("##### Data Input") mode = st.radio( "Input mode", ["Upload File", "Sample Data"], key="input_mode", horizontal=True, on_change=on_input_mode_change ) # ---- Upload tab ---- if mode == "Upload File": upload_key = st.session_state["current_upload_key"] up = st.file_uploader( "Upload Raman spectrum (.txt)", type="txt", help="Upload a text file with wavenumber and intensity columns", key=upload_key, # ← versioned key ) # == process change immediately (no on_change; simpler & reliable) == if up is not None: raw = up.read() text = raw.decode("utf-8") if isinstance(raw, bytes) else raw # == only reparse if its a different file|source == if st.session_state.get("filename") != getattr(up, "name", None) or st.session_state.get("input_source") != "upload": st.session_state["input_text"] = text st.session_state["filename"] = getattr(up, "name", "uploaded.txt") st.session_state["input_source"] = "upload" # == clear right column immediately == reset_results("New file selected") st.session_state["status_message"] = f"📁 File '{st.session_state['filename']}' ready for analysis" st.session_state["status_type"] = "success" # ---- Sample tab ---- else: sample_files = get_sample_files() if sample_files: options = ["-- Select Sample --"] + \ [p.name for p in sample_files] sel = st.selectbox( "Choose sample spectrum:", options, key="sample_select", on_change=on_sample_change, # <-- critical ) if sel != "-- Select Sample --": st.markdown(f"✅ Loaded sample: {sel}") else: st.info("No sample data available") # ---- Status box ---- msg = st.session_state.get("status_message", "Ready") typ = st.session_state.get("status_type", "info") if typ == "success": st.success(msg) elif typ == "error": st.error(msg) else: st.info(msg) # ---- Model load ---- model, model_loaded = load_model(model_choice) if not model_loaded: st.warning("⚠️ Model weights not available - using demo mode") # Ready to run if we have text and a model inference_ready = bool(st.session_state.get( "input_text")) and (model is not None) # === Run Analysis (form submit batches state) === with st.form("analysis_form", clear_on_submit=False): submitted = st.form_submit_button( "Run Analysis", type="primary", disabled=not inference_ready, ) if st.button("Reset", help="Clear current file(s), plots, and results"): reset_ephemeral_state() if submitted and inference_ready: # parse → preprocess → predict → render # Handles the submission of the analysis form and performs spectrum data processing try: raw_text = st.session_state["input_text"] filename = st.session_state.get("filename") or "unknown.txt" # Parse with st.spinner("Parsing spectrum data..."): x_raw, y_raw = parse_spectrum_data(raw_text) # Resample with st.spinner("Resampling spectrum..."): # ===Resample Unpack=== r1, r2 = resample_spectrum(x_raw, y_raw, TARGET_LEN) def _is_strictly_increasing(a): a = np.asarray(a) return a.ndim == 1 and a.size >= 2 and np.all(np.diff(a) > 0) if _is_strictly_increasing(r1) and not _is_strictly_increasing(r2): x_resampled, y_resampled = np.asarray(r1), np.asarray(r2) elif _is_strictly_increasing(r2) and not _is_strictly_increasing(r1): x_resampled, y_resampled = np.asarray(r2), np.asarray(r1) else: # == Ambigous; assume (x, y) and log x_resampled, y_resampled = np.asarray(r1), np.asarray(r2) log_message("Resample outputs ambigous; assumed (x, y).") # ===Persists for plotting + inference=== st.session_state["x_raw"] = x_raw st.session_state["y_raw"] = y_raw st.session_state["x_resampled"] = x_resampled # ←-- NEW st.session_state["y_resampled"] = y_resampled # Persist results (drives right column) st.session_state["x_raw"] = x_raw st.session_state["y_raw"] = y_raw st.session_state["y_resampled"] = y_resampled st.session_state["inference_run_once"] = True st.session_state["status_message"] = f"🔍 Analysis completed for: {filename}" st.session_state["status_type"] = "success" st.rerun() except (ValueError, TypeError) as e: st.error(f"❌ Analysis failed: {e}") st.session_state["status_message"] = f"❌ Error: {e}" st.session_state["status_type"] = "error" # Results column with col2: if st.session_state.get("inference_run_once", False): st.markdown("##### Analysis Results") # Get data from session state x_raw = st.session_state.get('x_raw') y_raw = st.session_state.get('y_raw') x_resampled = st.session_state.get('x_resampled') # ← NEW y_resampled = st.session_state.get('y_resampled') filename = st.session_state.get('filename', 'Unknown') if all(v is not None for v in [x_raw, y_raw, y_resampled]): # Create and display plot try: spectrum_plot = create_spectrum_plot(x_raw, y_raw, x_resampled, y_resampled) st.image( spectrum_plot, caption="Spectrum Preprocessing Results", use_container_width=True) except (ValueError, RuntimeError, TypeError) as e: st.warning(f"Could not generate plot: {e}") log_message(f"Plot generation error: {e}") # Run inference try: with st.spinner("Running AI inference..."): start_time = time.time() # Prepare input tensor input_tensor = torch.tensor( y_resampled, dtype=torch.float32).unsqueeze(0).unsqueeze(0) # Run inference model.eval() with torch.no_grad(): if model is None: raise ValueError( "Model is not loaded. Please check the model configuration or weights.") logits = model(input_tensor) prediction = torch.argmax(logits, dim=1).item() logits_list = logits.detach().numpy().tolist()[0] probs = F.softmax(logits.detach(), dim=1).cpu().numpy().flatten() inference_time = time.time() - start_time log_message( f"Inference completed in {inference_time:.2f}s, prediction: {prediction}") # Clean up memory cleanup_memory() # Get ground truth if available true_label_idx = label_file(filename) true_label_str = LABEL_MAP.get( true_label_idx, "Unknown") if true_label_idx is not None else "Unknown" # Get prediction predicted_class = LABEL_MAP.get( int(prediction), f"Class {int(prediction)}") # === confidence metrics === logit_margin = abs( logits_list[0] - logits_list[1]) if len(logits_list) >= 2 else 0 confidence_desc, confidence_emoji = get_confidence_description( logit_margin) # ===Detailed results tabs=== tab1, tab2, tab3 = st.tabs( ["Details", "Technical", "Explanation"]) with tab1: # Main prediction st.markdown(f""" **Sample**: `{filename}` **Model**: `{model_choice}` **Processing Time**: `{inference_time:.2f}s` """) # ===Prediction box && Confidence Margin=== with st.expander("Prediction/Ground Truth & Model Confidence Margin", expanded=False): if predicted_class == "Stable (Unweathered)": st.markdown(f"🟢 **Prediction**: {predicted_class}") else: st.markdown(f"🟡 **Prediction**: {predicted_class}") st.markdown( f"**{confidence_emoji} Confidence**: {confidence_desc} (margin: {logit_margin:.1f})") # Ground truth comparison if true_label_idx is not None: if predicted_class == true_label_str: st.markdown( f"✅ **Ground Truth**: {true_label_str} - **Correct!**") else: st.markdown( f"❌ **Ground Truth**: {true_label_str} - **Incorrect**") else: st.markdown( "**Ground Truth**: Unknown (filename doesn't follow naming convention)") st.markdown("###### Confidence Overview") render_confidence_progress( probs, labels=["Stable", "Weathered"], highlight_idx=int(prediction), side_by_side=True, # Set false for stacked << ) with tab2: with st.expander("Diagnostics/Technical Info (advanced)", expanded=False): st.markdown("###### Model Output (Logits)") cols = st.columns(2) for i, score in enumerate(logits_list): label = LABEL_MAP.get(i, f"Class {i}") (cols[i % 2]).metric(label, f"{score:.2f}") st.markdown("###### Spectrum Statistics") spec_stats = { "Original Length": len(x_raw) if x_raw is not None else 0, "Resampled Length": TARGET_LEN, "Wavenumber Range": f"{min(x_raw):.1f}–{max(x_raw):.1f} cm⁻¹" if x_raw is not None else "N/A", "Intensity Range": f"{min(y_raw):.1f}–{max(y_raw):.1f}" if y_raw is not None else "N/A", "Confidence Bucket": confidence_desc, } render_kv_grid(spec_stats, ncols=2) st.markdown("---") st.markdown("###### Model Statistics") model_path = MODEL_CONFIG[model_choice]["path"] mtime = os.path.getmtime(model_path) if os.path.exists(model_path) else None file_hash = ( hashlib.md5(open(model_path, 'rb').read()).hexdigest() if os.path.exists(model_path) else "N/A" ) model_stats = { "Architecture": model_choice, "Model Path": model_path, "Weights Last Modified": time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(mtime)) if mtime else "N/A", "Weights Hash (md5)": file_hash, "Input Shape": list(input_tensor.shape), "Output Shape": list(logits.shape), "Inference Time": f"{inference_time:.3f}s", "Device": "CPU", "Model Loaded": model_loaded, } render_kv_grid(model_stats, ncols=2) st.markdown("---") st.markdown("###### Debug Log") st.text_area("Logs", "\n".join(st.session_state.get("log_messages", [])), height=110) with tab3: st.markdown(""" **🔍 Analysis Process** 1. **Data Upload**: Raman spectrum file loaded 2. **Preprocessing**: Data parsed and resampled to 500 points 3. **AI Inference**: CNN model analyzes spectral patterns 4. **Classification**: Binary prediction with confidence scores **🧠 Model Interpretation** The AI model identifies spectral features indicative of: - **Stable polymers**: Well-preserved molecular structure - **Weathered polymers**: Degraded/oxidized molecular bonds **🎯 Applications** - Material longevity assessment - Recycling viability evaluation - Quality control in manufacturing - Environmental impact studies """) except (ValueError, RuntimeError) as e: st.error(f"❌ Inference failed: {str(e)}") log_message(f"Inference error: {str(e)}") else: st.error( "❌ Missing spectrum data. Please upload a file and run analysis.") else: # ===Getting Started=== st.markdown(""" ##### Get started by: 1. Select an AI model in the sidebar 2. Upload a Raman spectrum file or choose a sample 3. Click "Run Analysis" to get predictions ##### Supported formats: - Text files (.txt) with wavenumber and intensity columns - Space or comma-separated values - Any length (automatically resampled to 500 points) ##### Example applications: - 🔬 Research on polymer degradation - ♻️ Recycling feasibility assessment - 🌱 Sustainability impact studies - 🏭 Quality control in manufacturing """) # Run the application main()