Spaces:

dev-jas
/

polymer-aging-ml

Sleeping

App Files Files Community

devjas1 commited on Aug 2

Commit

e484a46

0 Parent(s):

Initial migration from original polymer_project

Browse files

Files changed (26) hide show

.gitattributes +1 -0
.gitignore +46 -0
LICENSE +201 -0
README.md +175 -0
app/ui_app.py +347 -0
backend/.gitignore +1 -0
backend/inference_utils.py +79 -0
backend/main.py +34 -0
docs/BACKEND_MIGRATION_LOG.md +60 -0
docs/ENVIRONMENT_GUIDE.md +119 -0
docs/HPC_REMOTE_SETUP.md +111 -0
docs/LICENSE +21 -0
docs/PROJECT_TIMELINE.md +156 -0
docs/REPRODUCIBILITY.md +132 -0
models/__init__.py +0 -0
models/figure2_cnn.py +77 -0
models/resnet_cnn.py +70 -0
outputs/resnet_model.pth +3 -0
scripts/__init__.py +0 -0
scripts/discover_raman_files.py +54 -0
scripts/list_spectra.py +77 -0
scripts/plot_spectrum.py +71 -0
scripts/preprocess_dataset.py +121 -0
scripts/run_inference.py +136 -0
scripts/train_model.py +157 -0
validate_pipeline.sh +60 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ outputs/resnet_model.pth filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ignore raw data and system clutter
+data
+datasets/
+depracated_scripts/
+__pycache__/
+*.pyc
+.DS_store
+*.zip
+*.h5
+*.log
+*.env
+*.yml
+*.json
+environment.yml
+test_env/
+_frozen_reference.txt
+_venv_docker_test/
+.streamlit
+logs/
+docs/scope_maintenance_log.yaml
+depracated_script/ftir_cv_diagnostics_run1.json
+depracated_script/ftir_cv_diagnostics.json
+depracated_script/ftir_model.pth
+depracated_script/plot_ftir_sample.py
+depracated_script/preprocess_ftir_legacy.py
+depracated_script/preprocess_ftir.py
+depracated_script/train_ftir_model_cv.py
+depracated_script/train_ftir_model.py
+depracated_script/train_model.py
+depracated_script/cnn_model.py
+models/cnn_model.py
+outputs\inference\test_prediction.json
+outputs\figure2_model.pth
+outputs\resnet_model.pth
+outputs\saliency
+outputs/plots/04_raman_diagnostics.ipynb
+outputs/figure2_model.pth
+outputs/inference/test_prediction.json
+docs/PROJECT_REPORT.md
+wea-*.txt
+sta-*.txt
+scripts/generate_saliency.py
+scripts/compare_samples.py

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,175 @@

+# 🔬 AI-Driven Polymer Aging Prediction and Classification System
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+A research project developed as part of AIRE 2025. This system applies deep learning to Raman spectral data to classify polymer aging — a critical proxy for recyclability — using a fully reproducible and modular ML pipeline.
+---
+## 🎯 Project Objective
+- Build a validated machine learning system for classifying polymer spectra (predict degradation levels as a proxy for recyclability)
+- Compare literature-based and modern CNN architectures (Figure2CNN vs. ResNet1D) on Raman spectral data
+- Ensure scientific reproducibility through structured diaignostics and artifact control
+- Support sustainability and circular materials research through spectrum-based classification.
+---
+## 🧠 Model Architectures
+| Model| Description |
+|------|-------------|
+| `Figure2CNN`  | Baseline model from literature |
+| `ResNet1D`    | Deeper candidate model with skip connections |
+> Both models support flexible input lengths; Figure2CNN relies on reshape logic, while ResNet1D uses native global pooling.
+---
+## 📁 Project Structure (Cleaned and Current)
+```text
+polymer_project/
+├── datasets/rdwp     # Raman spectra
+├── models/           # Model architectures
+├── scripts/          # Training, inference, utilities
+├── outputs/          # Artifacts: models, logs, plots
+├── docs/             # Documentation & reports
+└── environment.yml   # (local) Conda execution environment
+```
+---
+## ✅ Current Status
+| Track     | Status               | Test Accuracy |
+|-----------|----------------------|----------------|
+| **Raman** | ✅ Active & validated  | **87.81% ± 7.59%** |
+| **FTIR**  | ⏸️ Deferred (modeling only) | N/A |
+**Note:** FTIR preprocessing scripts are preserved but inactive. Modeling work is deferred until a suitable architecture is identified.
+**Artifacts:**
+- `outputs/figure2_model.pth`
+- `outputs/resnet_model.pth`
+- `outputs/logs/raman_{model}_diagnostics.json`
+---
+## 🔬 Key Features
+- ✅ 10-Fold Stratified Cross-Validation
+- ✅ CLI Training: `train_model.py`
+- ✅ CLI Inference `run_inference.py`
+- ✅ Output artifact naming per model
+- ✅ Raman-only preprocessing with baseline correction, smoothing, normalization
+- ✅ Structured diagnostics JSON (accuracies, confusion matrices)
+- ✅ Canonical validation script (`validate_pipeline.sh`) confirms reproducibility of all core components
+---
+## 🔀 Branching Strategy
+| Branch | Purpose|
+|--------|--------|
+| `main` | Local development (CPU) |
+| `hpc_main` | Cluster-ready (HPC; GPU) |
+**Environments:**
+```bash
+# Local
+git checkout main
+conda env create -f environment.yml
+conda activate polymer_env
+# HPC
+git checkout hpc-main
+conda env create -f environment_hpc.yml
+conda activate polymer_env
+```
+## 📊 Sample Training & Inference
+### Training (10-Fold CV)
+```bash
+python scripts/train_model.py --model resnet --target-len 4000 --baseline --smooth --normalize
+```
+### Inference (Raman)
+```bash
+python scripts/run_inference.py --target-len 4000
+--input datasets/rdwp/sample123.txt --model outputs/resnet_model.pth
+--output outputs/inference/prediction.txt
+```
+### Inference Output Example:
+```bash
+Predicted Label: 1 True Label: 1
+Raw Logits: [[-569.544, 427.996]]
+```
+### Validation Script (Raman Pipeline)
+```bash
+./validate_pipeline.sh
+# Runs preprocessing, training, inference, and plotting checks
+# Confirms artifact integrity and logs test results
+```
+---
+## 📚 Dataset Resources
+| Type  | Dataset | Source |
+|-------|---------|--------|
+| Raman | RDWP    | [A Raman database of microplastics weathered under natural environments](https://data.mendeley.com/datasets/kpygrf9fg6/1) |
+| Datasets should be downloaded separately and placed here:
+```bash
+datasets/
+└── rdwp/
+  ├── sample1.txt
+  ├── sample2.txt
+  └── ...
+```
+These files are intentionally excluded from version control via `.gitignore`
+---
+## 🛠 Dependencies
+- `Python 3.10+`
+- `Conda, Git`
+- `PyTorch (CPU & CUDA)`
+- `Numpy, SciPy, Pandas`
+- `Scikit-learn`
+- `Matplotlib, Seaborn`
+- `ArgParse, JSON`
+---
+## 🧑‍🤝‍🧑 Contributors
+- **Jaser H.** — AIRE 2025 Intern, Developer
+- **Dr. Kuppannagari** — Research Mentor
+---
+## 🚧 Next Steps
+- 🔍 Review diagnostics logs and summarize results in reports
+- 🔬 Conduct small-scale hyperparameter sweeps
+- 📈 Visual tools and presentation assets for showcase-ready delivery
+- 🪪 Prepare presentation-ready visuals and model cards for final reporting
+- ✅ Canonical validation completed (`@validation-loop-complete`)

app/ui_app.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import os
+import sys
+# Project base path
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+sys.path.append(BASE_DIR)
+from models.figure2_cnn import Figure2CNN
+from models.resnet_cnn import ResNet1D
+from scripts.preprocess_dataset import resample_spectrum
+from io import StringIO
+from glob import glob
+from pathlib import Path
+import numpy as np
+import streamlit as st
+import torch
+import matplotlib.pyplot as plt
+# Label map and label extractor
+label_map = {0: "Stable (Unweathered)", 1: "Weathered (Degraded)"}
+def label_file(filename: str) -> int:
+    name = Path(filename).name.lower()
+    if name.startswith("sta"):
+        return 0
+    elif name.startswith("wea"):
+        return 1
+    else:
+        raise ValueError("Unknown label pattern")
+# Page configuration
+st.set_page_config(
+    page_title="Polymer Aging Inference",
+    initial_sidebar_state="collapsed",
+    page_icon="🔬",
+    layout="wide")
+# Reset status if nothing is uploaded
+if 'uploaded_file' not in st.session_state:
+    st.session_state.status_message = "Awaiting input..."
+    st.session_state.status_type = "info"
+# Title and caption
+st.markdown("**🧪 Raman Spectrum Classifier**")
+st.caption("AI-driven classification of polymer degradation using Raman spectroscopy.")
+# Sidebar
+with st.sidebar:
+    st.header("ℹ️ About This App")
+    st.markdown("""
+    Part of the **AIRE 2025 Internship Project**:
+    `AI-Driven Polymer Aging Prediction and Classification`
+    Uses Raman spectra and deep learning to predict material degradation.
+    **Author**: Jaser Hasan
+    **Mentor**: Dr. Sanmukh Kuppannagari
+    [🔗 GitHub](https://github.com/dev-jaser/ai-ml-polymer-aging-prediction)
+    """)
+# Metadata for visual badges and metrics
+model_metadata = {
+    "Figure2CNN (Baseline)": {
+        "emoji": "🔬",
+        "description": "Baseline CNN with standard filters",
+        "accuracy": "94.80%",
+        "f1": "94.30%"
+    },
+    "ResNet1D (Advanced)": {
+        "emoji": "🧠",
+        "description": "Residual CNN with deeper feature learning",
+        "accuracy": "96.20%",
+        "f1": "95.90%"
+    }
+}
+model_config = {
+    "Figure2CNN (Baseline)": {
+        "model_class": Figure2CNN,
+        "model_path": "outputs/figure2_model.pth"
+    },
+    "ResNet1D (Advanced)": {
+        "model_class": ResNet1D,
+        "model_path": "outputs/resnet_model.pth"
+    }
+}
+col1, col2 = st.columns([1.1, 2], gap="large")  # optional for cleaner spacing
+try:
+    with col1:
+        # 📊 Upload + Model Selection
+        st.markdown("**📁 Upload Spectrum**")
+        # [NEW POSITION] 🧠 Model Selection grounded near data input
+        with st.container():
+            st.markdown("**🧠 Model Selection**")
+            # Enhanced model selector
+            model_labels = [
+                f"{model_metadata[name]['emoji']} {name}" for name in model_config.keys()
+            ]
+            selected_label = st.selectbox(
+                "Choose model architecture:",
+                model_labels,
+                key="model_selector"
+            )
+            model_choice = selected_label.split(" ", 1)[1]
+            with st.container():
+                meta = model_metadata[model_choice]
+                st.markdown(f"""
+                **📈 Model Overview**
+                *{meta['description']}*
+                - **Accuracy**: `{meta['accuracy']}`
+                - **F1 Score**: `{meta['f1']}`
+                """)
+            # Model path & check
+            # [PATCH] Use selected model config
+            MODEL_PATH = model_config[model_choice]["model_path"]
+            MODEL_EXISTS = Path(MODEL_PATH).exists()
+            TARGET_LEN = 500
+            if not MODEL_EXISTS:
+                st.error("🚫 Model file not found. Please train the model first.")
+        tab1, tab2 = st.tabs(["Upload File", "Use Sample"])
+        with tab1:
+            uploaded_file = st.file_uploader("Upload Raman `.txt` spectrum", type="txt")
+        with tab2:
+            sample_files = sorted(glob("app/sample_spectra/*.txt"))
+            sample_options = ["-- Select --"] + sample_files
+            selected_sample = st.selectbox("Choose a sample:", sample_options)
+            if selected_sample != "-- Select --":
+                with open(selected_sample, "r", encoding="utf-8") as f:
+                    file_contents = f.read()
+                uploaded_file = StringIO(file_contents)
+                uploaded_file.name = os.path.basename(selected_sample)
+        # Capture file in session
+        if uploaded_file is not None:
+            st.session_state['uploaded_file'] = uploaded_file
+            st.session_state['filename'] = uploaded_file.name
+            st.session_state.status_message = f"📁 File `{uploaded_file.name}` loaded. Ready to infer."
+            st.session_state.status_type = "success"
+            st.session_state.inference_run_once = False
+        # Status banner
+        st.markdown("**🚦 Pipeline Status**")
+        status_msg = st.session_state.get("status_message", "Awaiting input...")
+        status_typ = st.session_state.get("status_type", "info")
+        if status_typ == "success":
+            st.success(status_msg)
+        elif status_typ == "error":
+            st.error(status_msg)
+        else:
+            st.info(status_msg)
+        # Inference trigger
+        if st.button("▶️ Run Inference") and 'uploaded_file' in st.session_state and MODEL_EXISTS:
+            spectrum_name = st.session_state['filename']
+            uploaded_file = st.session_state['uploaded_file']
+            uploaded_file.seek(0)
+            raw_data = uploaded_file.read()
+            raw_text = raw_data.decode("utf-8") if isinstance(raw_data, bytes) else raw_data
+            # Parse spectrum
+            x_vals, y_vals = [], []
+            for line in raw_text.splitlines():
+                parts = line.strip().replace(",", " ").split()
+                numbers = [p for p in parts if p.replace('.', '', 1).replace('-', '', 1).isdigit()]
+                if len(numbers) >= 2:
+                    try:
+                        x, y = float(numbers[0]), float(numbers[1])
+                        x_vals.append(x)
+                        y_vals.append(y)
+                    except ValueError:
+                        continue
+            x_raw = np.array(x_vals)
+            y_raw = np.array(y_vals)
+            y_resampled = resample_spectrum(x_raw, y_raw, TARGET_LEN)
+            st.session_state['x_raw'] = x_raw
+            st.session_state['y_raw'] = y_raw
+            st.session_state['y_resampled'] = y_resampled
+            # ---
+            # Update banner for inference
+            st.session_state.status_message = f"🔍 Inference running on: `{spectrum_name}`"
+            st.session_state.status_type = "info"
+            st.session_state.inference_run_once = True
+    # Inference
+    with col2:
+        if st.session_state.get("inference_run_once", False):
+            # Plot: Raw + Resampled
+            x_raw = st.session_state.get("x_raw", None)
+            y_raw = st.session_state.get("y_raw", None)
+            y_resampled = st.session_state.get("y_resampled", None)
+            if x_raw is not None and y_raw is not None and y_resampled is not None:
+                st.subheader("📉 Spectrum Overview")
+                st.write("")  # Spacer line for visual breathing room
+                from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+                from PIL import Image
+                import io
+                # Create smaller figure
+                fig, ax = plt.subplots(1, 2, figsize=(8, 2.5), dpi=150)
+                ax[0].plot(x_raw, y_raw, label="Raw", color="dimgray")
+                ax[0].set_title("Raw Input")
+                ax[0].set_xlabel("Wavenumber")
+                ax[0].set_ylabel("Intensity")
+                ax[0].legend()
+                ax[1].plot(np.linspace(min(x_raw), max(x_raw), TARGET_LEN), y_resampled, label="Resampled", color="steelblue")
+                ax[1].set_title("Resampled")
+                ax[1].set_xlabel("Wavenumber")
+                ax[1].set_ylabel("Intensity")
+                ax[1].legend()
+                plt.tight_layout()
+                # Render to image buffer
+                canvas = FigureCanvas(fig)
+                buf = io.BytesIO()
+                canvas.print_png(buf)
+                buf.seek(0)
+                # Display fixed-size image
+                st.image(Image.open(buf), caption="Raw vs. Resampled Spectrum", width=880)
+            st.session_state['x_raw'] = x_raw
+            st.session_state['y_raw'] = y_raw
+            y_resampled = st.session_state.get('y_resampled', None)
+            if y_resampled is None:
+                st.error("❌ Error: Missing resampled spectrum. Please upload and run inference.")
+                st.stop()
+            input_tensor = torch.tensor(y_resampled, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
+            # [PATCH] Load selected model
+            ModelClass = model_config[model_choice]["model_class"]
+            model = ModelClass(input_length=TARGET_LEN)
+            model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"), strict=False)
+            model.eval()
+            with torch.no_grad():
+                logits = model(input_tensor)
+                prediction = torch.argmax(logits, dim=1).item()
+                logits_list = logits.numpy().tolist()[0]
+            try:
+                true_label_idx = label_file(spectrum_name)
+                true_label_str = label_map[true_label_idx]
+            except Exception:
+                true_label_idx = None
+                true_label_str = "Unknown"
+            predicted_class = label_map.get(prediction, f"Class {prediction}")
+            import torch.nn.functional as F
+            probs = F.softmax(torch.tensor(logits_list), dim=0).numpy()
+            # 🔬 Redesigned Prediction Block – Distinguishing Model vs Classification
+            tab_summary, tab_logits, tab_system, tab_explainer = st.tabs([
+            "🧠 Model Summary", "🔬 Logits", "⚙️ System Info", "📘 Explanation"])
+            with tab_summary:
+                st.markdown("### 🧠 AI Model Decision Summary")
+                st.markdown(f"""
+                **📃 File Analyzed:** `{spectrum_name}`
+                **🛠️ Model Chosen:** `{model_choice}`
+                """)
+                st.markdown("**🔍 Internal Model Prediction**")
+                st.write(f"The model believes this sample best matches: **`{predicted_class}`**")
+                if true_label_idx is not None:
+                    st.caption(f"Ground Truth Label: `{true_label_str}`")
+                logit_margin = abs(logits_list[0] - logits_list[1])
+                if logit_margin > 1000:
+                    strength_desc = "VERY STRONG"
+                elif logit_margin > 250:
+                    strength_desc = "STRONG"
+                elif logit_margin > 100:
+                    strength_desc = "MODERATE"
+                else:
+                    strength_desc = "UNCERTAIN"
+                st.markdown("🧪 Final Classification")
+                st.markdown("**📊 Model Confidence Estimate**")
+                st.write(f"**Decision Confidence:** `{strength_desc}` (margin = `{logit_margin:.1f}`)")
+                st.success(f"This spectrum is classified as: **`{predicted_class}`**")
+            with tab_logits:
+                st.markdown("🔬 View Internal Model Output (Logits)")
+                st.markdown("""
+                    These are the **raw output scores** from the model before making a final prediction.
+                    Higher scores indicate stronger alignment between the input spectrum and that class.
+                """)
+                st.json({
+                    label_map.get(i, f"Class {i}"): float(score)
+                    for i, score in enumerate(logits_list)
+                })
+            with tab_system:
+                st.markdown("⚙️ View System Info")
+                st.json({
+                    "Model Chosen": model_choice,
+                    "Spectrum Length": TARGET_LEN,
+                    "Processing Steps": "Raw Signal → Resampled → Inference"
+                })
+            with tab_explainer:
+                st.markdown("📘 What Just Happened?")
+                st.markdown("""
+                **🔍 Process Overview**
+                1. 🗂 A Raman spectrum was uploaded
+                2. 📏 Data was standardized
+                3. 🤖 AI model analyzed the spectrum
+                4. 📌 A classification was made
+                ---
+                **🧠 How the Model Operates**
+                Trained on known polymer conditions, the system detects spectral patterns
+                indicative of stable or weathered polymers.
+                ---
+                **✅ Why It Matters**
+                Enables:
+                - 🔬 Material longevity research
+                - 🔁 Recycling assessments
+                - 🌱 Sustainability decisions
+                """)
+except (ValueError, TypeError, RuntimeError) as e:
+        st.error(f"❌ Inference error: {e}")

backend/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

backend/inference_utils.py ADDED Viewed

	@@ -0,0 +1,79 @@

+def load_model(name):
+    return "mock_model"
+def run_inference(model, spectrum):
+    return {
+        "prediction": "Stubbed Output",
+        "class_index": 0,
+        "logits": [0.0, 1.0],
+        "class_labels": ["Stub", "Output"]
+    }
+# ---------- ACTUAL MODEL LOADING/INFERENCE CODE ---------------------|
+# import torch
+# import numpy as np
+# from pathlib import Path
+# from scripts.preprocess_dataset import resample_spectrum
+# from models.figure2_cnn import Figure2CNN
+# from models.resnet_cnn import ResNet1D
+# # -- Label Map --
+# LABELS = ["Stable (Unweathered)", "Weathered (Degraded)"]
+# # -- Model Paths --
+# MODEL_CONFIG = {
+#     "figure2": {
+#         "class": Figure2CNN,
+#         "path": "outputs/figure2_model.pth"
+#     },
+#     "resnet": {
+#         "class": ResNet1D,
+#         "path": "outputs/resnet_model.pth"
+#     }
+# }
+# def load_model(model_name: str):
+#     if model_name not in MODEL_CONFIG:
+#         raise ValueError(f"Unknown model '{model_name}'. Valid options: {list(MODEL_CONFIG.keys())}")
+#     config = MODEL_CONFIG[model_name]
+#     model = config["class"]()
+#     state_dict = torch.load(config["path"], map_location=torch.device("cpu"), weights_only=True)
+#     model.load_state_dict(state_dict)
+#     model.eval()
+#     return model
+# def run_inference(model, spectrum: list):
+#     # -- Validate Input --
+#     if not isinstance(spectrum, list) or len(spectrum) < 10:
+#         raise ValueError("Spectrum must be a list of floats with reasonable length")
+#     # -- Convert to Numpy --
+#     spectrum = np.array(spectrum, dtype=np.float32)
+#     # -- Resample --
+#     x_vals = np.arange(len(spectrum))
+#     spectrum = resample_spectrum(x_vals, spectrum, target_len=500)
+#     # -- Normalize --
+#     mean = np.mean(spectrum)
+#     std = np.std(spectrum)
+#     if std == 0:
+#         raise ValueError("Standard deviation of spectrum is zero; normalization will fail.")
+#     spectrum = (spectrum - mean) / std
+#     # -- To Tensor --
+#     x = torch.tensor(spectrum, dtype=torch.float32).unsqueeze(0).unsqueeze(0)   # Shape (1, 1, 500)
+#     with torch.no_grad():
+#         logits = model(x)
+#         pred_index = torch.argmax(logits, dim=1).item()
+#     return {
+#         "prediction": LABELS[pred_index],
+#         "class_index": pred_index,
+#         "logits": logits.squeeze().tolist(),
+#         "class_labels": LABELS
+#     }
+# ---------- ACTUAL MODEL LOADING/INFERENCE CODE ---------------------|

backend/main.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI
+from pydantic import BaseModel
+# import torch
+# from backend.inference_utils import load_model, run_inference
+# -- FastAPI app --
+app = FastAPI()
+# -- Input Schema --
+class InferenceRequest(BaseModel):
+    model_name: str
+    spectrum: list[float]
+@app.get("/")
+def root():
+    return {"message": "Polymer Aging Inference API is online"}
+@app.post("/infer")
+def infer(request: InferenceRequest):
+    return{
+        "prediction": "Stubbed Output",
+        "class_index": 0,
+        "logits": [0.0, 1.0],
+        "class_labels": ["Stub", "Output"],
+    }
+# def infer(request: InferenceRequest):
+#     try:
+#         model = load_model(request.model_name)
+#         result = run_inference(model, request.spectrum)
+#         return result
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=str(e)) from e

docs/BACKEND_MIGRATION_LOG.md ADDED Viewed

	@@ -0,0 +1,60 @@

+# BACKEND_MIGRATION_LOG.md
+## 📌 Overview
+This document tracks the migration of the inference logic from a monolithic Streamlit app to a modular, testable FastAPI backend for the Polymer AI Aging Prediction System
+---
+## ✅ Completed Work
+## 1. Initial Setup
+- Installed `fastapi`, `uvicorn`, and set up basic FastAPI app in `main.py`.
+### 2. Modular Inference Utilities
+- Moved `load_model()` and `run_inference()` into `backend/inference_utils.py`.
+- Separated model configuration for Figure2CNN and ResNet1D.
+- Applied proper preprocessing (resampling, normalization) inside `run_inference()`.
+### 3. API Endpoint
+- `/infer` route accepts JSON payloads with `model_name` and `spectrum`.
+- Returns: full prediction dictionary with class index, logits, and label map.
+### 4. Validation + Testing
+- Tested manually in Python REPL.
+- Tested via `curl`:
+  ```bash
+  curl -X POST  -H "Content-Type: application/json" -d @backend/test_payload.json
+  ```
+---
+## 🛠 Fixes & Breakpoints Resolved
+- ✅ Fixed incorrect model path ("models/" → "outputs/")
+- ✅ Corrected unpacking bug in `main.py` → now returns full result dict
+- ✅ Replaced invalid `tolist()` call on string-typed logits
+- ✅ Manually verified output from CLI and curl
+---
+## 🧪 Next Focus: Robustness Testing
+- Invalid `model_name` handling
+- Short/empty spectrum validation
+- ResNet model loading test
+- JSON schema validation for input
+- Unit tests via `pytest` or integration test runner
+---
+## 🔄 Future Enhancements
+- Modular model registry (for adding more model classes easily)
+- Add OpenAPI schema and example payloads for documentation
+- Enable batch inference or upload support

docs/ENVIRONMENT_GUIDE.md ADDED Viewed

	@@ -0,0 +1,119 @@

+# 🔧 Environment Management Guide
+## AI-Driven Polymer Aging Prediction and Classification System
+**Maintainer:** Jaser Hasan
+**Snapshot:** `@artifact-isolation-complete`
+**Last Updated:** 2025-06-26
+**Environments:** Conda (local) + venv on `/scratch` (HPC)
+---
+## 🧠 Overview
+This guide describes how to set up and activate the Python environments required to run the Raman pipeline on both:
+- **Local Systems:** (Mac/Windows/Linux)
+- **CWRU Pioneer HPC:** (GPU nodes, venv based)
+This guide documents the environment structure and the divergence between the **local Conda environment (`polymer_env`)** and the **HPC Python virtual environment (`polymer_venv`)**.
+---
+## 📁 Environment Overview
+| Platform | Environment | Manager | Path | Notes |
+|----------|-------------|---------|------|-------|
+| Local (dev) | `polymer_env` | **Conda** | `~/miniconda3/envs/polymer_env` | Primary for day-to-day development |
+| HPC (Pioneer) | `polymer_venv` | **venv** (Python stdlib) | `/scratch/users/<case_id>/polymer_project/polymer_venv` | Created under `/scratch` to avoid `/home` quota limits |
+---
+## 💻 Local Installation (Conda)
+```bash
+git clone https://github.com/dev-jaser/ai-ml-polymer-aging-prediction.git
+cd polymer_project
+conda env create -f environment.yml
+conda activate polymer_env
+python -c "import torch, sys; print('PyTorch:', torch.__version__, 'Python', sys.version")
+```
+> **Tip:** Keep Conda updated ('conda update conda') to reduce solver errors issues.
+---
+## 🚀 CWRU Pioneer HPC Setup (venv + pip)
+> Conda is intentionally **not** used on Pioneer due to prior codec and disk-quota
+### 1. Load Python Module
+```bash
+module purge
+module load Python/3.12.3-GCCcore-13.2.0
+```
+### 2. Create Working Directory in `/scratch`
+```bash
+mkdir -p /scratch/users/<case_id>/polymer_project_runtime
+cd /scratch/users/<case_id>/polymer_project_runtime
+git clone https://github.com/dev-jaser/ai-ml-polymer-aging-prediction.git
+```
+### 3. Create & Activate Virtual Environment
+```bash
+python3 -m venv polymer_env
+source polymer_env/bin/activate
+```
+### 4. Install Dependencies
+```bash
+pip install --upgrade pip
+pip install -r environment_hpc.yml      # Optimized dependencies list for Pioneer
+```
+(Optional) Save a reproducible freeze:
+```bash
+pip freeze > requirements_hpc.txt
+```
+---
+## ✅ Supported CLI Workflows (Raman-only)
+| Script | Purpose |
+|--------|---------|
+| `scripts/train_model.py` | 10-fold CV training ('--model figure2' or 'resnet') |
+| `scripts/run_inference.py` | Predict single Raman spectrum |
+| `scripts/preprocess_dataset.py` | Apply full preprocessing chain |
+| `scripts/plot_spectrum.py` | Quick spectrum visualization (.png) |
+> FTIR-related scripts are archived and *not installed* into the active environments.
+---
+## 🔁 Cross-Environment Parity
+- Package sets in environment.yml and environment_hpc.yml are aligned.
+- Diagnostics JSON structure and checkpoint filenames are identical on both systems.
+- Training commands are copy-paste compatible between local shell and HPC login shell.
+---
+## 📦 Best Practices
+- **Local:** use Conda for rapid iteration, notebook work, small CPU inference.
+- **HPC:** use venv in  `/scratch` for GPU training, never install large packages into `/home` (`'~/'`)
+- Keep environments lightweight; remove unused libraries to minimize rebuild time.
+- Update this guide if either environment definition changes.

docs/HPC_REMOTE_SETUP.md ADDED Viewed

	@@ -0,0 +1,111 @@

+# Accessing CWRU Pioneer HPC System Remotely via SSH (PuTTY)
+## Step 1: Set up DUO Authentication for VPN Access
+### 1. Enroll in DUO (if not already done):
+> - Go to [case.edu/utech/duo](https://case.edu/utech/duo) and follow instructions to register your device (phone/tablet/hardward token)
+> - This is required for FortiClient VPN authentication.
+---
+## Step 2: Install and Configure FortiClient VPN
+### 1. Download FortiClient VPN:
+- Visit [case.edu/utech/help/forticlient-vpn](https://case.edu/utech/help/forticlient-vpn)
+- Download the **FortiClient VPN** software for your specific device.
+### 2. Install & Configure VPN
+- Run the installer and complete setup
+- Open FortiClient and configure new connection:
+  - **Connection Name**: `CWRU VPN` (or any name)
+  - **Remote Gateway**: `vpn.case.edu`
+  - **Customize Port**: `443`
+  - Enable "**Save Credentials**" (optional)
+- Click **Save**
+### 3. Connect to VPN:
+- Enter your **CWRU Network ID** (e.g., `jxh369`) and password.
+- Complete **DUO two-factor authentication** when prompted (approve via phone/device)
+- Once connected, you'll see a confirmation message.
+---
+## Step 3: Install PuTTY (SSH Client)
+### 1. Download PuTTY:
+- If not installed, download from [https://www.putty.org](https://www.putty.org)
+- Run the installer (or use the portable version).
+## 2. Open PuTTY:
+- Launch PuTTY from the Start Menu
+---
+## Step 4: Configure PuTTY for Pioneer HPC
+### 1. Enter Connection Details:
+- **Host Name (or IP address)**: `pioneer.case.edu`
+- **Port**: `22`
+- **Connection Type**: SSH
+### 2. Optional: Save Session (for future use):
+- Under "**Saved Sessions**", type `Pioneer HPC` and click **Save**
+### 3. Click "Open" to initiate the connection
+---
+## Step 5: Log In via SSH
+### 1. Enter Credentials:
+- When prompted, enter your **CWRU Network ID** (e.g., `jxh369`)
+- Enter your password (same as VPN/CWRU login)
+- Complete DUO authentication again if required
+### 2. Successful Login:
+- You should now see the **Pioneer HPC command-line interface**
+---
+## Step 6: Disconnecting
+### 1. Exit SSH Session:
+- Type `exit` or `logout` in the terminal
+### 2. Disconnect VPN:
+- Close PuTTY and disconnect FortiClient VPN when done.
+---
+## Troubleshooting Tips
+### VPN Fails?
+- Ensure DUO is set up correctly
+- Try reconnecting or restarting FortiClient VPN
+### PuTTY Connection Refused?
+- Verify VPN is active (`vpn.case.edu` shows "**Connected**")
+- Check `pioneer.case.edu` and port `22` are correct
+## DUO Not Prompting?
+- Ensure your device is registered in DUO
+## Extra Help on CWRU HPC Systems
+[https://sites.google.com/a/case.edu/hpcc/](https://sites.google.com/a/case.edu/hpcc/)

docs/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 dev-jaser
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

docs/PROJECT_TIMELINE.md ADDED Viewed

	@@ -0,0 +1,156 @@

+# 📅 PROJECT_TIMELINE.md
+## AI-Driven Polymer Aging Prediction and Classification System
+**Intern:** Jaser Hasan
+### ✅ PHASE 1 – Project Kickoff and Faculty Guidance
+**Tag:** `@project-init-complete`
+Received first set of research tasks from Prof. Kuppannagari
+- Reeived research plan
+- Objectives defined: download datasets, analyze spectra, implement CNN, run initial inference
+---
+### ✅ PHASE 2 – Dataset Acquisition (Local System)
+**Tag:** `@data-downloaded`
+- Downloaded Raman `.txt`  (RDWP) and FTIR `.csv` data (polymer packaging)
+- Structured into:
+- `datasets/rdwp`
+- `datasets/ftir`
+---
+### ✅ PHASE 3 – Data Exploration & Spectral Validation
+**Tag:** `@data-exploration-complete`
+- Built plotting tools for Raman and FTIR
+- Validated spectrum structure, removed malformed samples
+- Observed structural inconsistencies in FTIR multi-layer grouping
+---
+### ✅ PHASE 4 – Preprocessing Pipeline Implementation
+**Tag:** `@data-prep`
+- Implemented `preprocess_dataset.py` for Raman
+- Applied: Resampling -> Baseline correction -> Smoothing -> Normalization
+- Confirmed reproducible input/output behavior and dynamic CLI control
+### ✅ PHASE 5 – Figure2CNN Architecture Build
+**Tag:** `@figure2cnn-complete`
+- Constructed `Figure2CNN` modeled after Figure 2 CNN from research paper
+- `Figure2CNN`: 4 conv layers + 3 FC layers
+- Verified dynamic input length handling (e.g., 500, 1000, 4000)
+---
+### ✅ PHASE 6 – Local Training and Inference
+**Tag:** `@figure2cnn-training-local`
+- Trained Raman models locally (FTIR now deferred)
+- Canonical Raman accuracy: **87.29% ± 6.30%**
+- FTIR accuracy results archived and excluded from current validation
+- CLI tools for training, inference, plotting implemented
+---
+### ✅ PHASE 7 –  Reproducibility and Documentation Setup
+**Tag:** `@project-docs-started`
+- Authored `README.md`, `PROJECT_REPORT.md`, and `ENVIRONMENT_GUIDE.md`
+- Defined reproducibility guidelines
+- Standardized project structure and versioning
+---
+### ✅ PHASE 8 – HPC Access and Venv Strategy
+**Tag:** `@hpc-login-successful`
+- Logged into CWRU Pioneer (SSH via PuTTY)
+- Setup up FortiClient VPN as it is required to access Pioneer remotely
+- Explored module system; selected venv over Conda for compatibility
+- Loaded Python 3.12.3 + created `polymer_env`
+---
+### ✅ PHASE 9 – HPC Environment Sync
+**Tag:** `@venv-alignment-complete`
+- Created `environment_hpc.yml`
+- Installed dependencies into `polymer_env`
+- Validated imports, PyTorch installation, and CLI script execution
+---
+### ✅ PHASE 10 – Full Instruction Validation on HPC
+**Tag:** `@prof-k-instruction-validation-complete`
+- Ran Raman preprocessing and plotting scripts
+- Executed `run_inference.py` with CLI on raw Raman `.txt` file
+- Verified consistent predictions and output logging across local and HPC
+---
+### ✅ PHASE 11 – FTIR Path Paused, Raman Declared Primary
+**Tag:** `@raman-pipeline-focus-milestone`
+- FTIR modeling formally deferred
+- FTIR preprocessing scripts preserved and archived for future use
+- All resources directed toward Raman pipeline finalization
+- Saliency, FTIR ingestion, and `train_ftir_model.py` archived
+---
+### ✅ PHASE 12 – ResNet1D Prototyping & Benchmark Setup
+**Tag:** `@resnet-prototype-complete`
+- Built `ResNet1D` architecture in `models/resnet_cnn.py`
+- Integrated `train_model.py` via `--model resnet`
+- Ran initial CV training with successful results
+---
+### ✅ PHASE 13 – Output Artifact Isolation
+**Tag:** `@artifact-isolation-complete`
+- Patched `train_model.py` to save:
+  - `figure2_model.pth`, `resnet_model.pth`
+  - `raman_figure2_diagnostics.json`. `raman_resnet_diagnostics.json`
+- Prevented all overwrites by tying output filenames to `args.model`
+- Snapshotted as reproducibility milestone. Enabled downstream validation harness.
+### ✅ PHASE 14 – Canonical Validation Achieved
+**Tag:** `@validation-loop-complete`
+- Created `validate_pipeline.sh` to verify preprocessing, training, inferece, plotting
+- Ran full validation using `Figure2CNN` with reproducible CLI config
+- All ouputs verified: logs, artifacts, predictions, plots
+- Declared Raman pipeline scientifically validated and stable
+---
+### ⏭️ NEXT - Results Analysis & Finalization
+- Analyze logged diagnostics for both models
+- Conduct optional hyperparameter tuning (batch size, LR)
+- Begin deliverable prep: visuals, posters, cards
+- Resume FTIR work only after Raman path is fully stablized and documented & open FTIR conceptual error is resolved

docs/REPRODUCIBILITY.md ADDED Viewed

	@@ -0,0 +1,132 @@

+# 📚 REPRODUCIBILITY.md
+*AI-Driven Polymer Aging Prediction & Classification System*
+*(Canonical Raman-only Pipeline)*
+> **Purpose**
+> A single document that lets any new user clone the repo, arquire the dataset, recreate the conda environment, and generate the validated Raman pipeline artifacts.
+---
+## 1. System Requirements
+| Component | Minimum Version | Notes |
+|-----------|-----------------|-------|
+| Python | 3.10+  | Conda recommended |
+| Git | 2.30+ | Any modern version |
+| Conda | 23.1+ | Mamba also fine |
+| OS  | Linux / MacOS / Windows | CPU run (no GPU needed) |
+| Disk | ~1 GB | Dataset + artifacts |
+---
+## 2. Clone Repository
+```bash
+git clone https://github.com/dev-jaser/ai-ml-polymer-aging-prediction.git
+cd ai-ml-polymer-aging-prediction
+git checkout main
+```
+---
+## 3. Create & Activate Conda Environment
+```bash
+conda env create -f environment.yml
+conda activate polymer_env
+```
+> **Tip:** If you already created `polymer_env` just run `conda activate polymer_env`
+---
+## 4. Download RDWP Raman Dataset
+1. Visit https://data.mendeley.com/datasets/kpygrf9fg6/1
+2. Download the archive (**RDWP.zip or similar**) by clicking `Download Add 10.3 MB`
+3. Extract all `*.txt` Raman files into:
+```bash
+ai-ml-polymer-aging-prediction/datasets/rdwp
+```
+4. Quick sanity check:
+```bash
+ls datasets/rdwp | grep ".txt" | wc -l # -> 170 + files expected
+```
+---
+## 5. Validate the Entire Pipeline
+Run the canonical smoke-test harness:
+```bash
+./validate_pipeline.sh
+```
+Successful run prints:
+```bash
+[PASS] Preprocessing
+[PASS] Training & artificats
+[PASS] Inference
+[PASS] Plotting
+All validation checks passed!
+```
+Artifacts created:
+```bash
+outputs/figure2_model.pth
+outputs/logs/raman_figure2_diagnostics.json
+outputs/inference/test_prediction.json
+outputs/plots/validation_plot.png
+```
+---
+## 6. Optional: Train ResNet Variant
+```python
+python scripts/train_model.py --model resnet --target-len 4000 --baseline --smooth --normalize
+```
+Check that these exist now:
+```bash
+outputs/resnet_model.pth
+outputs/logs/raman_resnet_diagnostics.json
+```
+---
+## 7. Clean-up & Re-Run
+To re-run from a clean state:
+```bash
+rm -rf outputs/*
+./validate_pipeline.sh
+```
+All artifacts will be regenerated.
+---
+## 8. Troubleshooting
+| Symptom | Likely Cause | Fix |
+|---------|--------------|-----|
+| `ModuleNotFoundError` during scripts| `conda activate polymer_env` not done | Activate env|
+| `CUDA not available` warning | Running on CPU | Safe to ignore |
+| Fewer than 170 files in `datasets/rdwp` | Incomplete extract | Re-download archive |
+| `validate_pipeline.sh: Permission denied` | Missing executable bit | `chmod +x validated_pipeline.sh` |
+---
+## 9. Contact
+For issues or questions, open an Issue in the GitHub repo or contact @dev-jaser

models/__init__.py ADDED Viewed

File without changes

models/figure2_cnn.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# 📌 MODEL DESIGNATION:
+# Figure2CNN is validated ONLY for RAMAN spectra input.
+# Any use for FTIR modeling is invalid and deprecated.
+# See milestone: @figure2cnn-raman-only-milestone
+import torch
+import torch.nn as nn
+class Figure2CNN(nn.Module):
+    """
+    CNN architecture based on Figure 2 of the referenced research paper.
+    Designed for 1D spectral data input of length 500
+    """
+    def __init__(self, input_length=500, input_channels=1):
+        super(Figure2CNN, self).__init__()
+        self.input_channels = input_channels
+        self.conv_block = nn.Sequential(
+            nn.Conv1d(input_channels, 16, kernel_size=5, padding=2),
+            nn.ReLU(),
+            nn.MaxPool1d(kernel_size=2),
+            nn.Conv1d(16, 32, kernel_size=5, padding=2),
+            nn.ReLU(),
+            nn.MaxPool1d(kernel_size=2),
+            nn.Conv1d(32, 64, kernel_size=5, padding=2),
+            nn.ReLU(),
+            nn.MaxPool1d(kernel_size=2),
+            nn.Conv1d(64, 128, kernel_size=5, padding=2),
+            nn.ReLU(),
+            nn.MaxPool1d(kernel_size=2),
+        )
+        # Dynamically calculate flattened size after conv + pooling
+        self.flattened_size = self._get_flattened_size(input_channels, input_length)
+        self.classifier = nn.Sequential(
+            nn.Linear(self.flattened_size, 256),
+            nn.ReLU(),
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Linear(128, 2)  # Binary output
+        )
+    def _get_flattened_size(self,input_channels, input_length):
+        with torch.no_grad():
+            dummy_input = torch.zeros(1, input_channels, input_length)
+            out = self.conv_block(dummy_input)
+            return out.view(1, -1).shape[1]
+    def forward(self, x):
+        """
+        Defines the forward pass of the Figure2CNN model.
+        Args:
+            x (torch.Tensor): Input tensor of shape (batch_size, channels, input_length).
+        Returns:
+            torch.Tensor: Output tensor containing class scores.
+        """
+        x = self.conv_block(x)
+        x = x.view(x.size(0), -1)  # Flatten
+        return self.classifier(x)
+    def describe_model(self):
+        """Print architecture and flattened size (for debug). """
+        print(r"\n Model Summary:")
+        print(r" - Conv Block: 4 Layers")
+        print(f" - Input length: {self.flattened_size} after conv/pool")
+        print(f" - Classifier: {self.classifier}\n")

models/resnet_cnn.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+📌 MODEL DESIGNATION:
+Figure2CNN is validated ONLY for RAMAN spectra input.
+Any use for FTIR modeling is invalid and deprecated.
+See milestone: @figure2cnn-raman-only-milestone
+"""
+import torch
+import torch.nn as nn
+class ResidualBlock1D(nn.Module):
+    """
+    Basic 1-D residual block:
+    Conv1d -> ReLU -> Conv1d (+ skip connection).
+    If channel count changes, a 1x1 Conv aligns the skip path.
+    """
+    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3):
+        super().__init__()
+        padding = kernel_size // 2
+        self.conv1 = nn.Conv1d(in_channels, out_channels,
+                               kernel_size, padding=padding)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv1d(out_channels, out_channels,
+                               kernel_size, padding=padding)
+        self.skip = (
+            nn.Identity()
+            if in_channels == out_channels
+            else nn.Conv1d(in_channels, out_channels, kernel_size=1)
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = self.skip(x)
+        out = self.relu(self.conv1(x))
+        out = self.conv2(out)
+        return self.relu(out + identity)
+    def describe_model(self):
+        """Print architecture and flattened size (for debug). """
+        print(r"\n Model Summary:")
+        print(r" - Conv Block: 4 Layers")
+        print(f" - Input length: {self.flattened_size} after conv/pool")
+        print(f" - Classifier: {self.classifier}\n")
+class ResNet1D(nn.Module):
+    """
+    Lightweight 1-D ResNet for Raman spectra (length 500, single channel).
+    """
+    def __init__(self, input_length: int = 500, num_classes: int = 2):
+        super().__init__()
+        # Three residual stages
+        self.stage1 = ResidualBlock1D(1, 16)
+        self.stage2 = ResidualBlock1D(16, 32)
+        self.stage3 = ResidualBlock1D(32, 64)
+        # Global aggregation + classifier
+        self.global_pool = nn.AdaptiveAvgPool1d(1)  # -> [B, 64, 1]
+        self.fc = nn.Linear(64, num_classes)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.global_pool(x).squeeze(-1)     # -> [B, 64]
+        return self.fc(x)

outputs/resnet_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f1d1b5541ade480077eeae8c627b8e2372076cc52f0be4e69a3b063895653a9
+size 114450

scripts/__init__.py ADDED Viewed

File without changes

scripts/discover_raman_files.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+def list_txt_files(root_dir):
+    """Recursively lists all .txt files in a directory."""
+    txt_files = []
+    for dirpath, _, filenames in os.walk(root_dir):
+        for file in filenames:
+            if file.endswith(".txt"):
+                full_path = os.path.join(dirpath, file)
+                txt_files.append(full_path)
+    return txt_files
+def label_file(filepath):
+    """
+    Assigns label based on filename prefix:
+    - 'sta-' => 0 (pristine)
+    - 'wea-' => 1 (weathered)
+    Returns None if prefix is unknown.
+    """
+    filename = os.path.basename(filepath).lower()
+    if filename.startswith("sta-"):
+        return 0
+    elif filename.startswith("wea-"):
+        return 1
+    else:
+        return None  # Unknown or irrelevant
+if __name__ == "__main__":
+    dataset_dir = os.path.join(
+        "datasets", "rdwp",
+        "A Raman database of microplastics weathered under natural environments"
+    )
+    txt_paths = list_txt_files(dataset_dir)
+    print(f"Found {len(txt_paths)} .txt files.")
+    print("Sample Files: ")
+    for path in txt_paths[:5]:
+        print(" -", path)
+    labeled_files = []
+    for path in txt_paths:
+        label = label_file(path)
+        if label is not None:
+            labeled_files.append((path, label))
+    print(f"\nLabeled {len(labeled_files)} files:")
+    for path, label in labeled_files[:5]:
+        print(f" - {os.path.basename(path)} => Label: {label}")

scripts/list_spectra.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+list_spectra.py
+This script provides functionality to recursively list all `.txt` files
+within a specified directory. It is designed to assist in managing and
+exploring datasets, particularly for Raman spectrum data stored in text files.
+Functions:
+-   list_txt_files(root_dir): Recursively finds and returns a list of all `.txt`
+    files in the given directory.
+Usage:
+-   The script can be executed directly to list `.txt` files in a predefined
+    dataset directory and print a summary, including the total count and a
+    sample of file paths.
+Example:
+    $ python list_spectra.py
+    Found 100 .txt files.
+    Sample Files:
+     - datasets/rdwp/.../file1.txt
+     - datasets/rdwp/.../file2.txt
+"""
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+def list_txt_files(root_dir):
+    """Recursively lists all .txt files in a directory."""
+    txt_files = []
+    for dirpath, _, filenames in os.walk(root_dir):
+        for file in filenames:
+            if file.endswith(".txt"):
+                full_path = os.path.join(dirpath, file)
+                txt_files.append(full_path)
+    return txt_files
+def label_file(filepath):
+    """
+    Assigns label based on filename prefix:
+    - 'sta-' => 0 (pristine)
+    - 'wea-' => 1 (weathered)
+    Returns None if prefix is unknown.
+    """
+    filename = os.path.basename(filepath).lower()
+    if filename.startswith("sta-"):
+        return 0
+    elif filename.startswith("wea-"):
+        return 1
+    else:
+        return None  # Unknown or irrelevant
+if __name__ == "__main__":
+    dataset_dir = os.path.join(
+        "datasets", "rdwp",
+        "A Raman database of microplastics weathered under natural environments"
+    )
+    txt_paths = list_txt_files(dataset_dir)
+    print(f"Found {len(txt_paths)} .txt files.")
+    print("Sample Files: ")
+    for path in txt_paths[:5]:
+        print(" -", path)
+    labeled_files = []
+    for path in txt_paths:
+        label = label_file(path)
+        if label is not None:
+            labeled_files.append((path, label))
+    print(f"\nLabeled {len(labeled_files)} files:")
+    for path, label in labeled_files[:5]:
+        print(f" - {os.path.basename(path)} => Label: {label}")

scripts/plot_spectrum.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+plot_spectrum.py
+This script provides functionality to load and plot Raman spectra from two-column `.txt` files.
+Functions:
+    - load_spectrum(filepath): Reads a spectrum file and extracts Raman shift and intensity values.
+    - plot_spectrum(x, y, title): Plots the Raman spectrum with basic styling.
+Command-line Usage:
+    The script can be run directly to load and plot a predefined spectrum file. Modify the `spectrum_file` variable to specify the file path.
+Dependencies:
+    - os: For file path operations.
+    - matplotlib.pyplot: For plotting the spectrum.
+Example:
+    python plot_spectrum.py
+"""
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+import matplotlib.pyplot as plt
+def load_spectrum(filepath):
+    """Loads a Raman spectrum from a two-column .txt file."""
+    x_vals, y_vals = [], []
+    with open(filepath, 'r', encoding='utf-8') as file:
+        for line in file:
+            parts = line.strip().split()
+            if len(parts) == 2:
+                try:
+                    x, y = float(parts[0]), float(parts[1])
+                    x_vals.append(x)
+                    y_vals.append(y)
+                except ValueError:
+                    continue  # Skip lines that can't be converted
+    return x_vals, y_vals
+def plot_spectrum(x, y, title="Raman Spectrum"):
+    """Plots the spectrum data with basic styling."""
+    plt.figure(figsize=(10, 5))
+    plt.plot(x, y, linewidth=1.5)
+    plt.xlabel("Raman Shift (cm⁻¹)")
+    plt.ylabel("Intensity (a.u.)")
+    plt.title(title)
+    plt.grid(True, linestyle='--', alpha=0.6)
+    plt.tight_layout()
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Plot a Raman spectrum from a .txt file.")
+    parser.add_argument("--input", type=str, required=True, help="Path to input .txt file")
+    parser.add_argument("--output", type=str, required=False, help="Path to save .png image")
+    args = parser.parse_args()
+    spectrum_file = args.input
+    output_file = args.output
+    x, y = load_spectrum(spectrum_file)
+    plot_spectrum(x, y, title=os.path.basename(spectrum_file))
+    if output_file:
+        plt.savefig(output_file)
+        print(f"✅ Plot saved to {output_file}")
+    else:
+        plt.show()

scripts/preprocess_dataset.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""
+This script preprocesses a dataset of spectra by resampling and labeling the data.
+Functions:
+- resample_spectrum(x, y, target_len): Resamples a spectrum to a fixed number of points.
+- preprocess_dataset(...): Loads, resamples, and applies optional preprocessing steps:
+  - baseline correction
+  - Savitzky-Golay smoothing
+  - min-max normalization
+The script expects the dataset directory to contain text files representing spectra.
+Each file is:
+1. Listed using `list_txt_files()`
+2. Labeled using `label_file()`
+3. Loaded using `load_spectrum()`
+4. Resampled and optionally cleaned
+5. Returned as arrays suitable for ML training
+Dependencies:
+- numpy
+- scipy.interpolate, scipy.signal
+- sklearn.preprocessing
+- list_spectra (custom)
+- plot_spectrum (custom)
+"""
+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+import numpy as np
+from scipy.interpolate import interp1d
+from scipy.signal import savgol_filter
+from sklearn.preprocessing import minmax_scale
+from scripts.discover_raman_files import list_txt_files, label_file
+from scripts.plot_spectrum import load_spectrum
+# Default resample target
+TARGET_LENGTH = 500
+# Optional preprocessing steps
+def remove_baseline(y):
+    """Simple baseline correction using polynomial fitting (order 2)"""
+    x = np.arange(len(y))
+    coeffs = np.polyfit(x, y, deg=2)
+    baseline = np.polyval(coeffs, x)
+    return y - baseline
+def normalize_spectrum(y):
+    """Min-max normalization to [0, 1]"""
+    return minmax_scale(y)
+def smooth_spectrum(y, window_length=11, polyorder=2):
+    """Apply Savitzky-Golay smoothing."""
+    return savgol_filter(y, window_length, polyorder)
+def resample_spectrum(x, y, target_len=TARGET_LENGTH):
+    """Resample a spectrum to a fixed number of points."""
+    f_interp = interp1d(x, y, kind='linear', fill_value='extrapolate')
+    x_uniform = np.linspace(min(x), max(x), target_len)
+    y_uniform = f_interp(x_uniform)
+    return y_uniform
+def preprocess_dataset(
+    dataset_dir,
+    target_len=500,
+    baseline_correction=False,
+    apply_smoothing=False,
+    normalize=False
+):
+    """
+    Load, resample, and preprocess all valid spectra in the dataset.
+    Args:
+        dataset_dir (str): Path to the dataset
+        target_len (int): Number of points to resample to
+        baseline_correction (bool): Whether to apply baseline removal
+        apply_smoothing (bool): Whether to apply Savitzky-Golay smoothing
+        normalize (bool): Whether to apply min-max normalization
+    Returns:
+        X (np.ndarray): Preprocessed spectra
+        y (np.ndarray): Corresponding labels
+    """
+    txt_paths = list_txt_files(dataset_dir)
+    X, y_labels = [], []
+    for path in txt_paths:
+        label = label_file(path)
+        if label is None:
+            continue
+        x_raw, y_raw = load_spectrum(path)
+        if len(x_raw) < 10:
+            continue  # Skip files with too few points
+        # Resample
+        y_processed = resample_spectrum(x_raw, y_raw, target_len=target_len)
+        # Optional preprocessing
+        if baseline_correction:
+            y_processed = remove_baseline(y_processed)
+        if apply_smoothing:
+            y_processed = smooth_spectrum(y_processed)
+        if normalize:
+            y_processed = normalize_spectrum(y_processed)
+        X.append(y_processed)
+        y_labels.append(label)
+    return np.array(X), np.array(y_labels)
+# Optional: Run directly for testing
+if __name__ == "__main__":
+    dataset_dir = os.path.join(
+        "datasets", "rdwp"
+    )
+    X, y = preprocess_dataset(dataset_dir)
+    print(f"X shape: {X.shape}")
+    print(f"y shape: {y.shape}")
+    print(f"Label distribution: {np.bincount(y)}")

scripts/run_inference.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+import argparse
+import warnings
+import logging
+import numpy as np
+import torch
+from models.figure2_cnn import Figure2CNN
+from scripts.preprocess_dataset import resample_spectrum, label_file
+# =============================================
+# ✅ Raman-Only Inference Script
+# This script supports prediction on a single Raman spectrum (.txt file).
+# FTIR inference has been deprecated and removed for scientific integrity.
+# See: @raman-pipeline-focus-milestone
+# =============================================
+warnings.filterwarnings(
+    "ignore",
+    message=".*weights_only=False.*",
+    category=FutureWarning
+)
+def load_raman_spectrum(filepath):
+    """Load a 2-column Raman spectrum from a .txt file"""
+    x_vals, y_vals = [], []
+    with open(filepath, 'r', encoding='utf-8') as f:
+        for line in f:
+            parts = line.strip().split()
+            if len(parts) == 2:
+                try:
+                    x, y = float(parts[0]), float(parts[1])
+                    x_vals.append(x)
+                    y_vals.append(y)
+                except ValueError:
+                    continue
+    return np.array(x_vals), np.array(y_vals)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run inference on a single Raman spectrum (.txt file)."
+    )
+    parser.add_argument(
+        "--target-len", type=int, required=True,
+        help="Target length to match model input"
+    )
+    parser.add_argument(
+        "--input", required=True,
+        help="Path to Raman .txt file."
+    )
+    parser.add_argument(
+        "--model", default="random",
+        help="Path to .pth model file, or specify 'random' to use untrained weights."
+    )
+    parser.add_argument(
+        "--output", default=None,
+        help="Where to write prediction result. If omitted, prints to stdout."
+    )
+    verbosity = parser.add_mutually_exclusive_group()
+    verbosity.add_argument(
+        "--quiet", action="store_true",
+        help="Show only warnings and errors"
+    )
+    verbosity.add_argument(
+        "--verbose", action="store_true",
+        help="Show debug-level logging"
+    )
+    args = parser.parse_args()
+    # configure logging
+    level = logging.INFO
+    if args.verbose:
+        level = logging.DEBUG
+    elif args.quiet:
+        level = logging.WARNING
+    logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
+    try:
+        # Load & preprocess Raman spectrum
+        if os.path.isdir(args.input):
+            parser.error(f"Input must be a single Raman .txt file, got a directory: {args.input}")
+        x_raw, y_raw = load_raman_spectrum(args.input)
+        if len(x_raw) < 10:
+            parser.error("Spectrum too short for inference.")
+        data = resample_spectrum(x_raw, y_raw, target_len=args.target_len)
+        # Shape = (1, 1, target_len) — valid input for Raman inference
+        input_tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
+        # 2. Load Model
+        model = Figure2CNN(
+            input_length=args.target_len,
+            input_channels=1
+        )
+        if args.model != "random":
+            model.load_state_dict(
+                torch.load(args.model, map_location="cpu", weights_only=True)
+            )
+        model.eval()
+        # 3. Inference
+        with torch.no_grad():
+            logits = model(input_tensor)
+            pred = torch.argmax(logits, dim=1).item()
+        # 4. True Label
+        try:
+            true_label = label_file(args.input)
+            label_str = f"True Label: {true_label}"
+        except FileNotFoundError:
+            label_str = "True Label: Unknown"
+        result = f"Predicted Label: {pred} {label_str}\nRaw Logits: {logits.tolist()}"
+        logging.info(result)
+        # 5. Save or stdout
+        if args.output:
+            with open(args.output, "w", encoding="utf-8") as fout:
+                fout.write(result)
+            logging.info("Result saved to %s", args.output)
+        sys.exit(0)
+    except Exception as e:
+        logging.error(e)
+        sys.exit(1)

scripts/train_model.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import os, sys, json
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from datetime import datetime
+import argparse, numpy as np, torch
+from torch.utils.data import TensorDataset, DataLoader
+from sklearn.model_selection import StratifiedKFold
+from sklearn.metrics import confusion_matrix
+# Add project-specific imports
+from scripts.preprocess_dataset import preprocess_dataset
+from models.figure2_cnn import Figure2CNN
+from models.resnet_cnn import ResNet1D
+# Argument parser for CLI usage
+parser = argparse.ArgumentParser(
+    description="Run 10-fold CV on Raman data with optional preprocessing.")
+parser.add_argument("--target-len", type=int, default=500)
+parser.add_argument("--baseline", action="store_true")
+parser.add_argument("--smooth", action="store_true")
+parser.add_argument("--normalize", action="store_true")
+parser.add_argument("--batch-size", type=int, default=16)
+parser.add_argument("--epochs", type=int, default=10)
+parser.add_argument("--learning-rate", type=float, default=1e-3)
+parser.add_argument("--model", type=str, default="figure2",
+                    choices=["figure2", "resnet"])
+args = parser.parse_args()
+# Constants
+# Raman-only dataset (RDWP)
+DATASET_PATH = 'datasets/rdwp'
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
+NUM_FOLDS = 10
+# Ensure output dirs exist
+os.makedirs("outputs", exist_ok=True)
+os.makedirs("outputs/logs", exist_ok=True)
+print("Preprocessing Configuration:")
+print(f"    Reseample to    : {args.target_len}")
+print(f"    Baseline Correct: {'✅' if args.baseline else '❌'}")
+print(f"    Smoothing       : {'✅' if args.smooth else '❌'}")
+print(f"    Normalization   : {'✅' if args.normalize else '❌'}")
+# Load + Preprocess data
+print("🔄 Loading and preprocessing data ...")
+X, y = preprocess_dataset(
+    DATASET_PATH,
+    target_len=args.target_len,
+    baseline_correction=args.baseline,
+    apply_smoothing=args.smooth,
+    normalize=args.normalize
+)
+X, y = np.array(X, np.float32), np.array(y, np.int64)
+print(f"✅ Data Loaded: {X.shape[0]} samples, {X.shape[1]} features each.")
+print(f"🔍 Using model: {args.model}")
+# CV
+skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
+fold_accuracies = []
+all_conf_matrices = []
+for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
+    print(f"\n🔁 Fold {fold}/{NUM_FOLDS}")
+    X_train, X_val = X[train_idx], X[val_idx]
+    y_train, y_val = y[train_idx], y[val_idx]
+    train_loader = DataLoader(
+        TensorDataset(torch.tensor(X_train), torch.tensor(y_train)),
+        batch_size=args.batch_size, shuffle=True)
+    val_loader = DataLoader(
+        TensorDataset(torch.tensor(X_val), torch.tensor(y_val)),batch_size=args.batch_size)
+    # Model selection
+    model = (Figure2CNN if args.model == "figure2" else ResNet1D)(
+        input_length=args.target_len).to(DEVICE)
+    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
+    criterion = torch.nn.CrossEntropyLoss()
+    for epoch in range(args.epochs):
+        model.train()
+        RUNNING_LOSS = 0.0
+        for inputs, labels in train_loader:
+            inputs = inputs.unsqueeze(1).to(DEVICE)
+            labels = labels.to(DEVICE)
+            optimizer.zero_grad()
+            loss = criterion(model(inputs), labels)
+            loss.backward()
+            optimizer.step()
+            RUNNING_LOSS += loss.item()
+    # After fold loop (outside the epoch loop), print 1 line:
+    print(f"✅ Fold {fold} done. Final loss: {RUNNING_LOSS:.4f}")
+    # Evaluation
+    model.eval()
+    all_true, all_pred = [], []
+    with torch.no_grad():
+        for inputs, labels in val_loader:
+            inputs = inputs.unsqueeze(1).to(DEVICE)
+            labels = labels.to(DEVICE)
+            outputs = model(inputs)
+            _, predicted = torch.max(outputs, 1)
+            all_true.extend(labels.cpu().numpy())
+            all_pred.extend(predicted.cpu().numpy())
+    acc = 100 * np.mean(np.array(all_true) == np.array(all_pred))
+    fold_accuracies.append(acc)
+    all_conf_matrices.append(confusion_matrix(all_true, all_pred))
+    print(f"✅ Fold {fold} Accuracy: {acc:.2f}%")
+# Save model checkpoint **after** final fold
+model_path = f"outputs/{args.model}_model.pth"
+torch.save(model.state_dict(), model_path)
+# Summary
+mean_acc, std_acc = np.mean(fold_accuracies), np.std(fold_accuracies)
+print("\n📊 Cross-Validation Results:")
+for i, a in enumerate(fold_accuracies, 1):
+    print(f"Fold {i}: {a:.2f}%")
+print(f"\n✅ Mean Accuracy: {mean_acc:.2f}% ± {std_acc:.2f}%")
+print(f"✅ Model saved to {model_path}")
+# Save diagnostics
+def save_diagnostics_log(fold_acc, confs, args_param, output_path):
+    fold_metrics = [{"fold": i+1, "accuracy": acc,
+                    "confusion_matrix": c.tolist()}
+        for i, (a, c) in enumerate(zip(fold_acc, confs))]
+    log = {
+        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "preprocessing": {
+            "target_len": args_param.target_len,
+            "baseline": args_param.baseline,
+            "smooth": args_param.smooth,
+            "normalize": args_param.normalize,
+        },
+        "fold_metrics": fold_metrics,
+        "overall": {
+            "mean_accuracy": float(np.mean(fold_acc)),
+            "std_accuracy": float(np.std(fold_acc)),
+            "num_folds": len(fold_acc),
+            "batch_size": args_param.batch_size,
+            "epochs": args_param.epochs,
+            "learning_rate": args_param.learning_rate,
+            "device": str(DEVICE)
+        }
+    }
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(log, f, indent=2)
+    print(f"🧠 Diagnostics written to {output_path}")
+log_path = f"outputs/logs/raman_{args.model}_diagnostics.json"
+save_diagnostics_log(fold_accuracies, all_conf_matrices, args, log_path)

validate_pipeline.sh ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env bash
+# ===========================================
+# validate_pipeline.sh — Canonical Smoke Test
+# AI-Driven Polymer Aging Prediction System
+# Requires: conda (or venv) already installed
+# ===========================================
+set -euo pipefail
+RED='\033[0;31m'
+GRN='\033[0;32m'
+YLW='\033[1;33m'
+NC='\033[0m'
+die() {
+    echo -e "{RED}[FAIL] $1${NC}"
+    exit 1
+}
+pass() { echo -e "{GRN}[PASS] $1${NC}"; }
+echo -e "${YLW}>>> Activating environment...${NC}"
+source "$(conda info --base)/etc/profile.d/conda.sh"
+conda activate polymer_env || die "conda env 'polymer_env' not found"
+root_dir="$(dirname "$(readlink -f "$0")")"
+cd "$root_dir" || fir "repo root not found"
+# ---------- Step 1: Preprocessing ----------
+echo -e "${YLW}>>> Step 1: Preprocessing${NC}"
+python scripts/preprocess_dataset.py datasets/rdwp \
+    --target-len 500 --baseline --smooth --normalize |
+    grep -q "X shape:" || die "preprocess_dataset.py failed"
+pass "Preprocessing"
+# ---------- Step 2: CV Training (Figure2) ----------
+echo -e "${YLW}>>> Step 2: 10-Fold CV Training${NC}"
+python scripts/train_model.py \
+    --target-len 500 --baseline --smooth --normalize \
+    --model figure2
+[[ -f outputs/figure2_model.pth ]] || die "model .pth not found"
+[[ -f outputs/logs/raman_figure2_diagnostics.json ]] || die "diagnostics JSON not found"
+pass "Training & artifacts"
+# ---------- Step 3: Inference ----------
+echo -e "${YLW}>>> Step 3: Inference${NC}"
+python scripts/run_inference.py \
+    --target-len 500 \
+    --input datasets/rdwp/wea-100.txt \
+    --model outputs/figure2_model.pth \
+    --output outputs/inference/test_prediction.json
+[[ -f outputs/inference/test_prediction.json ]] || die "inference output missing"
+pass "Inference"
+# ---------- Step 4: Spectrum Plot ----------
+echo -e "${YLW}>>> Step 4: Plot Spectrum${NC}"
+python scripts/plot_spectrum.py --input datasets/rdwp/sta-10.txt
+[[ $? -eq 0 ]] || die "plot_spectrum.py failed"
+pass "Plotting"
+echo -e "${GRN}All validation checks passed!${NC}"