Spaces:
Sleeping
Sleeping
devjas1
commited on
Commit
·
e484a46
0
Parent(s):
Initial migration from original polymer_project
Browse files- .gitattributes +1 -0
- .gitignore +46 -0
- LICENSE +201 -0
- README.md +175 -0
- app/ui_app.py +347 -0
- backend/.gitignore +1 -0
- backend/inference_utils.py +79 -0
- backend/main.py +34 -0
- docs/BACKEND_MIGRATION_LOG.md +60 -0
- docs/ENVIRONMENT_GUIDE.md +119 -0
- docs/HPC_REMOTE_SETUP.md +111 -0
- docs/LICENSE +21 -0
- docs/PROJECT_TIMELINE.md +156 -0
- docs/REPRODUCIBILITY.md +132 -0
- models/__init__.py +0 -0
- models/figure2_cnn.py +77 -0
- models/resnet_cnn.py +70 -0
- outputs/resnet_model.pth +3 -0
- scripts/__init__.py +0 -0
- scripts/discover_raman_files.py +54 -0
- scripts/list_spectra.py +77 -0
- scripts/plot_spectrum.py +71 -0
- scripts/preprocess_dataset.py +121 -0
- scripts/run_inference.py +136 -0
- scripts/train_model.py +157 -0
- validate_pipeline.sh +60 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
outputs/resnet_model.pth filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore raw data and system clutter
|
| 2 |
+
|
| 3 |
+
data
|
| 4 |
+
datasets/
|
| 5 |
+
depracated_scripts/
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.pyc
|
| 8 |
+
.DS_store
|
| 9 |
+
*.zip
|
| 10 |
+
*.h5
|
| 11 |
+
*.log
|
| 12 |
+
*.env
|
| 13 |
+
*.yml
|
| 14 |
+
*.json
|
| 15 |
+
environment.yml
|
| 16 |
+
test_env/
|
| 17 |
+
_frozen_reference.txt
|
| 18 |
+
|
| 19 |
+
_venv_docker_test/
|
| 20 |
+
|
| 21 |
+
.streamlit
|
| 22 |
+
logs/
|
| 23 |
+
docs/scope_maintenance_log.yaml
|
| 24 |
+
depracated_script/ftir_cv_diagnostics_run1.json
|
| 25 |
+
depracated_script/ftir_cv_diagnostics.json
|
| 26 |
+
depracated_script/ftir_model.pth
|
| 27 |
+
depracated_script/plot_ftir_sample.py
|
| 28 |
+
depracated_script/preprocess_ftir_legacy.py
|
| 29 |
+
depracated_script/preprocess_ftir.py
|
| 30 |
+
depracated_script/train_ftir_model_cv.py
|
| 31 |
+
depracated_script/train_ftir_model.py
|
| 32 |
+
depracated_script/train_model.py
|
| 33 |
+
depracated_script/cnn_model.py
|
| 34 |
+
models/cnn_model.py
|
| 35 |
+
outputs\inference\test_prediction.json
|
| 36 |
+
outputs\figure2_model.pth
|
| 37 |
+
outputs\resnet_model.pth
|
| 38 |
+
outputs\saliency
|
| 39 |
+
outputs/plots/04_raman_diagnostics.ipynb
|
| 40 |
+
outputs/figure2_model.pth
|
| 41 |
+
outputs/inference/test_prediction.json
|
| 42 |
+
docs/PROJECT_REPORT.md
|
| 43 |
+
wea-*.txt
|
| 44 |
+
sta-*.txt
|
| 45 |
+
scripts/generate_saliency.py
|
| 46 |
+
scripts/compare_samples.py
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
README.md
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# 🔬 AI-Driven Polymer Aging Prediction and Classification System
|
| 3 |
+
|
| 4 |
+
[](https://opensource.org/licenses/MIT)
|
| 5 |
+
|
| 6 |
+
A research project developed as part of AIRE 2025. This system applies deep learning to Raman spectral data to classify polymer aging — a critical proxy for recyclability — using a fully reproducible and modular ML pipeline.
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## 🎯 Project Objective
|
| 11 |
+
|
| 12 |
+
- Build a validated machine learning system for classifying polymer spectra (predict degradation levels as a proxy for recyclability)
|
| 13 |
+
- Compare literature-based and modern CNN architectures (Figure2CNN vs. ResNet1D) on Raman spectral data
|
| 14 |
+
- Ensure scientific reproducibility through structured diaignostics and artifact control
|
| 15 |
+
- Support sustainability and circular materials research through spectrum-based classification.
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## 🧠 Model Architectures
|
| 20 |
+
|
| 21 |
+
| Model| Description |
|
| 22 |
+
|------|-------------|
|
| 23 |
+
| `Figure2CNN` | Baseline model from literature |
|
| 24 |
+
| `ResNet1D` | Deeper candidate model with skip connections |
|
| 25 |
+
|
| 26 |
+
> Both models support flexible input lengths; Figure2CNN relies on reshape logic, while ResNet1D uses native global pooling.
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## 📁 Project Structure (Cleaned and Current)
|
| 31 |
+
|
| 32 |
+
```text
|
| 33 |
+
polymer_project/
|
| 34 |
+
├── datasets/rdwp # Raman spectra
|
| 35 |
+
├── models/ # Model architectures
|
| 36 |
+
├── scripts/ # Training, inference, utilities
|
| 37 |
+
├── outputs/ # Artifacts: models, logs, plots
|
| 38 |
+
├── docs/ # Documentation & reports
|
| 39 |
+
└── environment.yml # (local) Conda execution environment
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## ✅ Current Status
|
| 45 |
+
|
| 46 |
+
| Track | Status | Test Accuracy |
|
| 47 |
+
|-----------|----------------------|----------------|
|
| 48 |
+
| **Raman** | ✅ Active & validated | **87.81% ± 7.59%** |
|
| 49 |
+
| **FTIR** | ⏸️ Deferred (modeling only) | N/A |
|
| 50 |
+
|
| 51 |
+
**Note:** FTIR preprocessing scripts are preserved but inactive. Modeling work is deferred until a suitable architecture is identified.
|
| 52 |
+
|
| 53 |
+
**Artifacts:**
|
| 54 |
+
|
| 55 |
+
- `outputs/figure2_model.pth`
|
| 56 |
+
- `outputs/resnet_model.pth`
|
| 57 |
+
- `outputs/logs/raman_{model}_diagnostics.json`
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## 🔬 Key Features
|
| 62 |
+
|
| 63 |
+
- ✅ 10-Fold Stratified Cross-Validation
|
| 64 |
+
- ✅ CLI Training: `train_model.py`
|
| 65 |
+
- ✅ CLI Inference `run_inference.py`
|
| 66 |
+
- ✅ Output artifact naming per model
|
| 67 |
+
- ✅ Raman-only preprocessing with baseline correction, smoothing, normalization
|
| 68 |
+
- ✅ Structured diagnostics JSON (accuracies, confusion matrices)
|
| 69 |
+
- ✅ Canonical validation script (`validate_pipeline.sh`) confirms reproducibility of all core components
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
## 🔀 Branching Strategy
|
| 74 |
+
|
| 75 |
+
| Branch | Purpose|
|
| 76 |
+
|--------|--------|
|
| 77 |
+
| `main` | Local development (CPU) |
|
| 78 |
+
| `hpc_main` | Cluster-ready (HPC; GPU) |
|
| 79 |
+
|
| 80 |
+
**Environments:**
|
| 81 |
+
|
| 82 |
+
```bash
|
| 83 |
+
|
| 84 |
+
# Local
|
| 85 |
+
git checkout main
|
| 86 |
+
conda env create -f environment.yml
|
| 87 |
+
conda activate polymer_env
|
| 88 |
+
|
| 89 |
+
# HPC
|
| 90 |
+
git checkout hpc-main
|
| 91 |
+
conda env create -f environment_hpc.yml
|
| 92 |
+
conda activate polymer_env
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
## 📊 Sample Training & Inference
|
| 96 |
+
|
| 97 |
+
### Training (10-Fold CV)
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
|
| 101 |
+
python scripts/train_model.py --model resnet --target-len 4000 --baseline --smooth --normalize
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### Inference (Raman)
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
|
| 108 |
+
python scripts/run_inference.py --target-len 4000
|
| 109 |
+
--input datasets/rdwp/sample123.txt --model outputs/resnet_model.pth
|
| 110 |
+
--output outputs/inference/prediction.txt
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
### Inference Output Example:
|
| 114 |
+
|
| 115 |
+
```bash
|
| 116 |
+
Predicted Label: 1 True Label: 1
|
| 117 |
+
Raw Logits: [[-569.544, 427.996]]
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
### Validation Script (Raman Pipeline)
|
| 121 |
+
|
| 122 |
+
```bash
|
| 123 |
+
./validate_pipeline.sh
|
| 124 |
+
# Runs preprocessing, training, inference, and plotting checks
|
| 125 |
+
# Confirms artifact integrity and logs test results
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
## 📚 Dataset Resources
|
| 131 |
+
|
| 132 |
+
| Type | Dataset | Source |
|
| 133 |
+
|-------|---------|--------|
|
| 134 |
+
| Raman | RDWP | [A Raman database of microplastics weathered under natural environments](https://data.mendeley.com/datasets/kpygrf9fg6/1) |
|
| 135 |
+
|
| 136 |
+
| Datasets should be downloaded separately and placed here:
|
| 137 |
+
|
| 138 |
+
```bash
|
| 139 |
+
datasets/
|
| 140 |
+
└── rdwp/
|
| 141 |
+
├── sample1.txt
|
| 142 |
+
├── sample2.txt
|
| 143 |
+
└── ...
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
These files are intentionally excluded from version control via `.gitignore`
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## 🛠 Dependencies
|
| 151 |
+
|
| 152 |
+
- `Python 3.10+`
|
| 153 |
+
- `Conda, Git`
|
| 154 |
+
- `PyTorch (CPU & CUDA)`
|
| 155 |
+
- `Numpy, SciPy, Pandas`
|
| 156 |
+
- `Scikit-learn`
|
| 157 |
+
- `Matplotlib, Seaborn`
|
| 158 |
+
- `ArgParse, JSON`
|
| 159 |
+
|
| 160 |
+
---
|
| 161 |
+
|
| 162 |
+
## 🧑🤝🧑 Contributors
|
| 163 |
+
|
| 164 |
+
- **Jaser H.** — AIRE 2025 Intern, Developer
|
| 165 |
+
- **Dr. Kuppannagari** — Research Mentor
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
|
| 169 |
+
## 🚧 Next Steps
|
| 170 |
+
|
| 171 |
+
- 🔍 Review diagnostics logs and summarize results in reports
|
| 172 |
+
- 🔬 Conduct small-scale hyperparameter sweeps
|
| 173 |
+
- 📈 Visual tools and presentation assets for showcase-ready delivery
|
| 174 |
+
- 🪪 Prepare presentation-ready visuals and model cards for final reporting
|
| 175 |
+
- ✅ Canonical validation completed (`@validation-loop-complete`)
|
app/ui_app.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
# Project base path
|
| 5 |
+
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 6 |
+
sys.path.append(BASE_DIR)
|
| 7 |
+
|
| 8 |
+
from models.figure2_cnn import Figure2CNN
|
| 9 |
+
from models.resnet_cnn import ResNet1D
|
| 10 |
+
from scripts.preprocess_dataset import resample_spectrum
|
| 11 |
+
|
| 12 |
+
from io import StringIO
|
| 13 |
+
from glob import glob
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
import numpy as np
|
| 16 |
+
import streamlit as st
|
| 17 |
+
import torch
|
| 18 |
+
import matplotlib.pyplot as plt
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Label map and label extractor
|
| 23 |
+
label_map = {0: "Stable (Unweathered)", 1: "Weathered (Degraded)"}
|
| 24 |
+
|
| 25 |
+
def label_file(filename: str) -> int:
|
| 26 |
+
name = Path(filename).name.lower()
|
| 27 |
+
if name.startswith("sta"):
|
| 28 |
+
return 0
|
| 29 |
+
elif name.startswith("wea"):
|
| 30 |
+
return 1
|
| 31 |
+
else:
|
| 32 |
+
raise ValueError("Unknown label pattern")
|
| 33 |
+
|
| 34 |
+
# Page configuration
|
| 35 |
+
st.set_page_config(
|
| 36 |
+
page_title="Polymer Aging Inference",
|
| 37 |
+
initial_sidebar_state="collapsed",
|
| 38 |
+
page_icon="🔬",
|
| 39 |
+
layout="wide")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Reset status if nothing is uploaded
|
| 43 |
+
if 'uploaded_file' not in st.session_state:
|
| 44 |
+
st.session_state.status_message = "Awaiting input..."
|
| 45 |
+
st.session_state.status_type = "info"
|
| 46 |
+
|
| 47 |
+
# Title and caption
|
| 48 |
+
st.markdown("**🧪 Raman Spectrum Classifier**")
|
| 49 |
+
st.caption("AI-driven classification of polymer degradation using Raman spectroscopy.")
|
| 50 |
+
|
| 51 |
+
# Sidebar
|
| 52 |
+
with st.sidebar:
|
| 53 |
+
st.header("ℹ️ About This App")
|
| 54 |
+
st.markdown("""
|
| 55 |
+
Part of the **AIRE 2025 Internship Project**:
|
| 56 |
+
`AI-Driven Polymer Aging Prediction and Classification`
|
| 57 |
+
|
| 58 |
+
Uses Raman spectra and deep learning to predict material degradation.
|
| 59 |
+
|
| 60 |
+
**Author**: Jaser Hasan
|
| 61 |
+
**Mentor**: Dr. Sanmukh Kuppannagari
|
| 62 |
+
[🔗 GitHub](https://github.com/dev-jaser/ai-ml-polymer-aging-prediction)
|
| 63 |
+
""")
|
| 64 |
+
|
| 65 |
+
# Metadata for visual badges and metrics
|
| 66 |
+
model_metadata = {
|
| 67 |
+
"Figure2CNN (Baseline)": {
|
| 68 |
+
"emoji": "🔬",
|
| 69 |
+
"description": "Baseline CNN with standard filters",
|
| 70 |
+
"accuracy": "94.80%",
|
| 71 |
+
"f1": "94.30%"
|
| 72 |
+
},
|
| 73 |
+
"ResNet1D (Advanced)": {
|
| 74 |
+
"emoji": "🧠",
|
| 75 |
+
"description": "Residual CNN with deeper feature learning",
|
| 76 |
+
"accuracy": "96.20%",
|
| 77 |
+
"f1": "95.90%"
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
model_config = {
|
| 82 |
+
"Figure2CNN (Baseline)": {
|
| 83 |
+
"model_class": Figure2CNN,
|
| 84 |
+
"model_path": "outputs/figure2_model.pth"
|
| 85 |
+
},
|
| 86 |
+
"ResNet1D (Advanced)": {
|
| 87 |
+
"model_class": ResNet1D,
|
| 88 |
+
"model_path": "outputs/resnet_model.pth"
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
col1, col2 = st.columns([1.1, 2], gap="large") # optional for cleaner spacing
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
with col1:
|
| 96 |
+
# 📊 Upload + Model Selection
|
| 97 |
+
st.markdown("**📁 Upload Spectrum**")
|
| 98 |
+
|
| 99 |
+
# [NEW POSITION] 🧠 Model Selection grounded near data input
|
| 100 |
+
with st.container():
|
| 101 |
+
st.markdown("**🧠 Model Selection**")
|
| 102 |
+
# Enhanced model selector
|
| 103 |
+
model_labels = [
|
| 104 |
+
f"{model_metadata[name]['emoji']} {name}" for name in model_config.keys()
|
| 105 |
+
]
|
| 106 |
+
selected_label = st.selectbox(
|
| 107 |
+
"Choose model architecture:",
|
| 108 |
+
model_labels,
|
| 109 |
+
key="model_selector"
|
| 110 |
+
)
|
| 111 |
+
model_choice = selected_label.split(" ", 1)[1]
|
| 112 |
+
with st.container():
|
| 113 |
+
meta = model_metadata[model_choice]
|
| 114 |
+
st.markdown(f"""
|
| 115 |
+
**📈 Model Overview**
|
| 116 |
+
*{meta['description']}*
|
| 117 |
+
|
| 118 |
+
- **Accuracy**: `{meta['accuracy']}`
|
| 119 |
+
- **F1 Score**: `{meta['f1']}`
|
| 120 |
+
""")
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# Model path & check
|
| 124 |
+
# [PATCH] Use selected model config
|
| 125 |
+
MODEL_PATH = model_config[model_choice]["model_path"]
|
| 126 |
+
MODEL_EXISTS = Path(MODEL_PATH).exists()
|
| 127 |
+
TARGET_LEN = 500
|
| 128 |
+
|
| 129 |
+
if not MODEL_EXISTS:
|
| 130 |
+
st.error("🚫 Model file not found. Please train the model first.")
|
| 131 |
+
tab1, tab2 = st.tabs(["Upload File", "Use Sample"])
|
| 132 |
+
with tab1:
|
| 133 |
+
uploaded_file = st.file_uploader("Upload Raman `.txt` spectrum", type="txt")
|
| 134 |
+
with tab2:
|
| 135 |
+
sample_files = sorted(glob("app/sample_spectra/*.txt"))
|
| 136 |
+
sample_options = ["-- Select --"] + sample_files
|
| 137 |
+
selected_sample = st.selectbox("Choose a sample:", sample_options)
|
| 138 |
+
if selected_sample != "-- Select --":
|
| 139 |
+
with open(selected_sample, "r", encoding="utf-8") as f:
|
| 140 |
+
file_contents = f.read()
|
| 141 |
+
uploaded_file = StringIO(file_contents)
|
| 142 |
+
uploaded_file.name = os.path.basename(selected_sample)
|
| 143 |
+
|
| 144 |
+
# Capture file in session
|
| 145 |
+
if uploaded_file is not None:
|
| 146 |
+
st.session_state['uploaded_file'] = uploaded_file
|
| 147 |
+
st.session_state['filename'] = uploaded_file.name
|
| 148 |
+
st.session_state.status_message = f"📁 File `{uploaded_file.name}` loaded. Ready to infer."
|
| 149 |
+
st.session_state.status_type = "success"
|
| 150 |
+
st.session_state.inference_run_once = False
|
| 151 |
+
|
| 152 |
+
# Status banner
|
| 153 |
+
st.markdown("**🚦 Pipeline Status**")
|
| 154 |
+
status_msg = st.session_state.get("status_message", "Awaiting input...")
|
| 155 |
+
status_typ = st.session_state.get("status_type", "info")
|
| 156 |
+
if status_typ == "success":
|
| 157 |
+
st.success(status_msg)
|
| 158 |
+
elif status_typ == "error":
|
| 159 |
+
st.error(status_msg)
|
| 160 |
+
else:
|
| 161 |
+
st.info(status_msg)
|
| 162 |
+
|
| 163 |
+
# Inference trigger
|
| 164 |
+
if st.button("▶️ Run Inference") and 'uploaded_file' in st.session_state and MODEL_EXISTS:
|
| 165 |
+
spectrum_name = st.session_state['filename']
|
| 166 |
+
uploaded_file = st.session_state['uploaded_file']
|
| 167 |
+
uploaded_file.seek(0)
|
| 168 |
+
raw_data = uploaded_file.read()
|
| 169 |
+
raw_text = raw_data.decode("utf-8") if isinstance(raw_data, bytes) else raw_data
|
| 170 |
+
|
| 171 |
+
# Parse spectrum
|
| 172 |
+
x_vals, y_vals = [], []
|
| 173 |
+
for line in raw_text.splitlines():
|
| 174 |
+
parts = line.strip().replace(",", " ").split()
|
| 175 |
+
numbers = [p for p in parts if p.replace('.', '', 1).replace('-', '', 1).isdigit()]
|
| 176 |
+
if len(numbers) >= 2:
|
| 177 |
+
try:
|
| 178 |
+
x, y = float(numbers[0]), float(numbers[1])
|
| 179 |
+
x_vals.append(x)
|
| 180 |
+
y_vals.append(y)
|
| 181 |
+
except ValueError:
|
| 182 |
+
continue
|
| 183 |
+
|
| 184 |
+
x_raw = np.array(x_vals)
|
| 185 |
+
y_raw = np.array(y_vals)
|
| 186 |
+
y_resampled = resample_spectrum(x_raw, y_raw, TARGET_LEN)
|
| 187 |
+
st.session_state['x_raw'] = x_raw
|
| 188 |
+
st.session_state['y_raw'] = y_raw
|
| 189 |
+
st.session_state['y_resampled'] = y_resampled
|
| 190 |
+
|
| 191 |
+
# ---
|
| 192 |
+
|
| 193 |
+
# Update banner for inference
|
| 194 |
+
st.session_state.status_message = f"🔍 Inference running on: `{spectrum_name}`"
|
| 195 |
+
st.session_state.status_type = "info"
|
| 196 |
+
st.session_state.inference_run_once = True
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# Inference
|
| 200 |
+
|
| 201 |
+
with col2:
|
| 202 |
+
if st.session_state.get("inference_run_once", False):
|
| 203 |
+
# Plot: Raw + Resampled
|
| 204 |
+
x_raw = st.session_state.get("x_raw", None)
|
| 205 |
+
y_raw = st.session_state.get("y_raw", None)
|
| 206 |
+
y_resampled = st.session_state.get("y_resampled", None)
|
| 207 |
+
if x_raw is not None and y_raw is not None and y_resampled is not None:
|
| 208 |
+
st.subheader("📉 Spectrum Overview")
|
| 209 |
+
st.write("") # Spacer line for visual breathing room
|
| 210 |
+
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
| 211 |
+
from PIL import Image
|
| 212 |
+
import io
|
| 213 |
+
|
| 214 |
+
# Create smaller figure
|
| 215 |
+
fig, ax = plt.subplots(1, 2, figsize=(8, 2.5), dpi=150)
|
| 216 |
+
ax[0].plot(x_raw, y_raw, label="Raw", color="dimgray")
|
| 217 |
+
ax[0].set_title("Raw Input")
|
| 218 |
+
ax[0].set_xlabel("Wavenumber")
|
| 219 |
+
ax[0].set_ylabel("Intensity")
|
| 220 |
+
ax[0].legend()
|
| 221 |
+
|
| 222 |
+
ax[1].plot(np.linspace(min(x_raw), max(x_raw), TARGET_LEN), y_resampled, label="Resampled", color="steelblue")
|
| 223 |
+
ax[1].set_title("Resampled")
|
| 224 |
+
ax[1].set_xlabel("Wavenumber")
|
| 225 |
+
ax[1].set_ylabel("Intensity")
|
| 226 |
+
ax[1].legend()
|
| 227 |
+
|
| 228 |
+
plt.tight_layout()
|
| 229 |
+
|
| 230 |
+
# Render to image buffer
|
| 231 |
+
canvas = FigureCanvas(fig)
|
| 232 |
+
buf = io.BytesIO()
|
| 233 |
+
canvas.print_png(buf)
|
| 234 |
+
buf.seek(0)
|
| 235 |
+
|
| 236 |
+
# Display fixed-size image
|
| 237 |
+
st.image(Image.open(buf), caption="Raw vs. Resampled Spectrum", width=880)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
st.session_state['x_raw'] = x_raw
|
| 241 |
+
st.session_state['y_raw'] = y_raw
|
| 242 |
+
|
| 243 |
+
y_resampled = st.session_state.get('y_resampled', None)
|
| 244 |
+
if y_resampled is None:
|
| 245 |
+
st.error("❌ Error: Missing resampled spectrum. Please upload and run inference.")
|
| 246 |
+
st.stop()
|
| 247 |
+
input_tensor = torch.tensor(y_resampled, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
|
| 248 |
+
# [PATCH] Load selected model
|
| 249 |
+
ModelClass = model_config[model_choice]["model_class"]
|
| 250 |
+
model = ModelClass(input_length=TARGET_LEN)
|
| 251 |
+
|
| 252 |
+
model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"), strict=False)
|
| 253 |
+
model.eval()
|
| 254 |
+
with torch.no_grad():
|
| 255 |
+
logits = model(input_tensor)
|
| 256 |
+
prediction = torch.argmax(logits, dim=1).item()
|
| 257 |
+
logits_list = logits.numpy().tolist()[0]
|
| 258 |
+
try:
|
| 259 |
+
true_label_idx = label_file(spectrum_name)
|
| 260 |
+
true_label_str = label_map[true_label_idx]
|
| 261 |
+
except Exception:
|
| 262 |
+
true_label_idx = None
|
| 263 |
+
true_label_str = "Unknown"
|
| 264 |
+
predicted_class = label_map.get(prediction, f"Class {prediction}")
|
| 265 |
+
|
| 266 |
+
import torch.nn.functional as F
|
| 267 |
+
probs = F.softmax(torch.tensor(logits_list), dim=0).numpy()
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
# 🔬 Redesigned Prediction Block – Distinguishing Model vs Classification
|
| 271 |
+
tab_summary, tab_logits, tab_system, tab_explainer = st.tabs([
|
| 272 |
+
"🧠 Model Summary", "🔬 Logits", "⚙️ System Info", "📘 Explanation"])
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
with tab_summary:
|
| 276 |
+
st.markdown("### 🧠 AI Model Decision Summary")
|
| 277 |
+
st.markdown(f"""
|
| 278 |
+
**📃 File Analyzed:** `{spectrum_name}`
|
| 279 |
+
|
| 280 |
+
**🛠️ Model Chosen:** `{model_choice}`
|
| 281 |
+
""")
|
| 282 |
+
st.markdown("**🔍 Internal Model Prediction**")
|
| 283 |
+
st.write(f"The model believes this sample best matches: **`{predicted_class}`**")
|
| 284 |
+
if true_label_idx is not None:
|
| 285 |
+
st.caption(f"Ground Truth Label: `{true_label_str}`")
|
| 286 |
+
|
| 287 |
+
logit_margin = abs(logits_list[0] - logits_list[1])
|
| 288 |
+
if logit_margin > 1000:
|
| 289 |
+
strength_desc = "VERY STRONG"
|
| 290 |
+
elif logit_margin > 250:
|
| 291 |
+
strength_desc = "STRONG"
|
| 292 |
+
elif logit_margin > 100:
|
| 293 |
+
strength_desc = "MODERATE"
|
| 294 |
+
else:
|
| 295 |
+
strength_desc = "UNCERTAIN"
|
| 296 |
+
|
| 297 |
+
st.markdown("🧪 Final Classification")
|
| 298 |
+
st.markdown("**📊 Model Confidence Estimate**")
|
| 299 |
+
st.write(f"**Decision Confidence:** `{strength_desc}` (margin = `{logit_margin:.1f}`)")
|
| 300 |
+
st.success(f"This spectrum is classified as: **`{predicted_class}`**")
|
| 301 |
+
|
| 302 |
+
with tab_logits:
|
| 303 |
+
st.markdown("🔬 View Internal Model Output (Logits)")
|
| 304 |
+
st.markdown("""
|
| 305 |
+
These are the **raw output scores** from the model before making a final prediction.
|
| 306 |
+
|
| 307 |
+
Higher scores indicate stronger alignment between the input spectrum and that class.
|
| 308 |
+
""")
|
| 309 |
+
st.json({
|
| 310 |
+
label_map.get(i, f"Class {i}"): float(score)
|
| 311 |
+
for i, score in enumerate(logits_list)
|
| 312 |
+
})
|
| 313 |
+
|
| 314 |
+
with tab_system:
|
| 315 |
+
st.markdown("⚙️ View System Info")
|
| 316 |
+
st.json({
|
| 317 |
+
"Model Chosen": model_choice,
|
| 318 |
+
"Spectrum Length": TARGET_LEN,
|
| 319 |
+
"Processing Steps": "Raw Signal → Resampled → Inference"
|
| 320 |
+
})
|
| 321 |
+
|
| 322 |
+
with tab_explainer:
|
| 323 |
+
st.markdown("📘 What Just Happened?")
|
| 324 |
+
st.markdown("""
|
| 325 |
+
**🔍 Process Overview**
|
| 326 |
+
1. 🗂 A Raman spectrum was uploaded
|
| 327 |
+
2. 📏 Data was standardized
|
| 328 |
+
3. 🤖 AI model analyzed the spectrum
|
| 329 |
+
4. 📌 A classification was made
|
| 330 |
+
|
| 331 |
+
---
|
| 332 |
+
**🧠 How the Model Operates**
|
| 333 |
+
|
| 334 |
+
Trained on known polymer conditions, the system detects spectral patterns
|
| 335 |
+
indicative of stable or weathered polymers.
|
| 336 |
+
|
| 337 |
+
---
|
| 338 |
+
**✅ Why It Matters**
|
| 339 |
+
|
| 340 |
+
Enables:
|
| 341 |
+
- 🔬 Material longevity research
|
| 342 |
+
- 🔁 Recycling assessments
|
| 343 |
+
- 🌱 Sustainability decisions
|
| 344 |
+
""")
|
| 345 |
+
|
| 346 |
+
except (ValueError, TypeError, RuntimeError) as e:
|
| 347 |
+
st.error(f"❌ Inference error: {e}")
|
backend/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
backend/inference_utils.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def load_model(name):
|
| 2 |
+
return "mock_model"
|
| 3 |
+
|
| 4 |
+
def run_inference(model, spectrum):
|
| 5 |
+
return {
|
| 6 |
+
"prediction": "Stubbed Output",
|
| 7 |
+
"class_index": 0,
|
| 8 |
+
"logits": [0.0, 1.0],
|
| 9 |
+
"class_labels": ["Stub", "Output"]
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ---------- ACTUAL MODEL LOADING/INFERENCE CODE ---------------------|
|
| 14 |
+
# import torch
|
| 15 |
+
# import numpy as np
|
| 16 |
+
# from pathlib import Path
|
| 17 |
+
# from scripts.preprocess_dataset import resample_spectrum
|
| 18 |
+
# from models.figure2_cnn import Figure2CNN
|
| 19 |
+
# from models.resnet_cnn import ResNet1D
|
| 20 |
+
|
| 21 |
+
# # -- Label Map --
|
| 22 |
+
# LABELS = ["Stable (Unweathered)", "Weathered (Degraded)"]
|
| 23 |
+
|
| 24 |
+
# # -- Model Paths --
|
| 25 |
+
# MODEL_CONFIG = {
|
| 26 |
+
# "figure2": {
|
| 27 |
+
# "class": Figure2CNN,
|
| 28 |
+
# "path": "outputs/figure2_model.pth"
|
| 29 |
+
# },
|
| 30 |
+
# "resnet": {
|
| 31 |
+
# "class": ResNet1D,
|
| 32 |
+
# "path": "outputs/resnet_model.pth"
|
| 33 |
+
# }
|
| 34 |
+
# }
|
| 35 |
+
|
| 36 |
+
# def load_model(model_name: str):
|
| 37 |
+
# if model_name not in MODEL_CONFIG:
|
| 38 |
+
# raise ValueError(f"Unknown model '{model_name}'. Valid options: {list(MODEL_CONFIG.keys())}")
|
| 39 |
+
|
| 40 |
+
# config = MODEL_CONFIG[model_name]
|
| 41 |
+
# model = config["class"]()
|
| 42 |
+
# state_dict = torch.load(config["path"], map_location=torch.device("cpu"), weights_only=True)
|
| 43 |
+
# model.load_state_dict(state_dict)
|
| 44 |
+
# model.eval()
|
| 45 |
+
# return model
|
| 46 |
+
|
| 47 |
+
# def run_inference(model, spectrum: list):
|
| 48 |
+
# # -- Validate Input --
|
| 49 |
+
# if not isinstance(spectrum, list) or len(spectrum) < 10:
|
| 50 |
+
# raise ValueError("Spectrum must be a list of floats with reasonable length")
|
| 51 |
+
|
| 52 |
+
# # -- Convert to Numpy --
|
| 53 |
+
# spectrum = np.array(spectrum, dtype=np.float32)
|
| 54 |
+
|
| 55 |
+
# # -- Resample --
|
| 56 |
+
# x_vals = np.arange(len(spectrum))
|
| 57 |
+
# spectrum = resample_spectrum(x_vals, spectrum, target_len=500)
|
| 58 |
+
|
| 59 |
+
# # -- Normalize --
|
| 60 |
+
# mean = np.mean(spectrum)
|
| 61 |
+
# std = np.std(spectrum)
|
| 62 |
+
# if std == 0:
|
| 63 |
+
# raise ValueError("Standard deviation of spectrum is zero; normalization will fail.")
|
| 64 |
+
# spectrum = (spectrum - mean) / std
|
| 65 |
+
|
| 66 |
+
# # -- To Tensor --
|
| 67 |
+
# x = torch.tensor(spectrum, dtype=torch.float32).unsqueeze(0).unsqueeze(0) # Shape (1, 1, 500)
|
| 68 |
+
|
| 69 |
+
# with torch.no_grad():
|
| 70 |
+
# logits = model(x)
|
| 71 |
+
# pred_index = torch.argmax(logits, dim=1).item()
|
| 72 |
+
|
| 73 |
+
# return {
|
| 74 |
+
# "prediction": LABELS[pred_index],
|
| 75 |
+
# "class_index": pred_index,
|
| 76 |
+
# "logits": logits.squeeze().tolist(),
|
| 77 |
+
# "class_labels": LABELS
|
| 78 |
+
# }
|
| 79 |
+
# ---------- ACTUAL MODEL LOADING/INFERENCE CODE ---------------------|
|
backend/main.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from fastapi import FastAPI, HTTPException
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
# import torch
|
| 5 |
+
|
| 6 |
+
# from backend.inference_utils import load_model, run_inference
|
| 7 |
+
|
| 8 |
+
# -- FastAPI app --
|
| 9 |
+
app = FastAPI()
|
| 10 |
+
|
| 11 |
+
# -- Input Schema --
|
| 12 |
+
class InferenceRequest(BaseModel):
|
| 13 |
+
model_name: str
|
| 14 |
+
spectrum: list[float]
|
| 15 |
+
|
| 16 |
+
@app.get("/")
|
| 17 |
+
def root():
|
| 18 |
+
return {"message": "Polymer Aging Inference API is online"}
|
| 19 |
+
|
| 20 |
+
@app.post("/infer")
|
| 21 |
+
def infer(request: InferenceRequest):
|
| 22 |
+
return{
|
| 23 |
+
"prediction": "Stubbed Output",
|
| 24 |
+
"class_index": 0,
|
| 25 |
+
"logits": [0.0, 1.0],
|
| 26 |
+
"class_labels": ["Stub", "Output"],
|
| 27 |
+
}
|
| 28 |
+
# def infer(request: InferenceRequest):
|
| 29 |
+
# try:
|
| 30 |
+
# model = load_model(request.model_name)
|
| 31 |
+
# result = run_inference(model, request.spectrum)
|
| 32 |
+
# return result
|
| 33 |
+
# except Exception as e:
|
| 34 |
+
# raise HTTPException(status_code=500, detail=str(e)) from e
|
docs/BACKEND_MIGRATION_LOG.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BACKEND_MIGRATION_LOG.md
|
| 2 |
+
|
| 3 |
+
## 📌 Overview
|
| 4 |
+
|
| 5 |
+
This document tracks the migration of the inference logic from a monolithic Streamlit app to a modular, testable FastAPI backend for the Polymer AI Aging Prediction System
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## ✅ Completed Work
|
| 10 |
+
|
| 11 |
+
## 1. Initial Setup
|
| 12 |
+
|
| 13 |
+
- Installed `fastapi`, `uvicorn`, and set up basic FastAPI app in `main.py`.
|
| 14 |
+
|
| 15 |
+
### 2. Modular Inference Utilities
|
| 16 |
+
|
| 17 |
+
- Moved `load_model()` and `run_inference()` into `backend/inference_utils.py`.
|
| 18 |
+
- Separated model configuration for Figure2CNN and ResNet1D.
|
| 19 |
+
- Applied proper preprocessing (resampling, normalization) inside `run_inference()`.
|
| 20 |
+
|
| 21 |
+
### 3. API Endpoint
|
| 22 |
+
|
| 23 |
+
- `/infer` route accepts JSON payloads with `model_name` and `spectrum`.
|
| 24 |
+
- Returns: full prediction dictionary with class index, logits, and label map.
|
| 25 |
+
|
| 26 |
+
### 4. Validation + Testing
|
| 27 |
+
|
| 28 |
+
- Tested manually in Python REPL.
|
| 29 |
+
- Tested via `curl`:
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
curl -X POST -H "Content-Type: application/json" -d @backend/test_payload.json
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## 🛠 Fixes & Breakpoints Resolved
|
| 38 |
+
|
| 39 |
+
- ✅ Fixed incorrect model path ("models/" → "outputs/")
|
| 40 |
+
- ✅ Corrected unpacking bug in `main.py` → now returns full result dict
|
| 41 |
+
- ✅ Replaced invalid `tolist()` call on string-typed logits
|
| 42 |
+
- ✅ Manually verified output from CLI and curl
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## 🧪 Next Focus: Robustness Testing
|
| 47 |
+
|
| 48 |
+
- Invalid `model_name` handling
|
| 49 |
+
- Short/empty spectrum validation
|
| 50 |
+
- ResNet model loading test
|
| 51 |
+
- JSON schema validation for input
|
| 52 |
+
- Unit tests via `pytest` or integration test runner
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## 🔄 Future Enhancements
|
| 57 |
+
|
| 58 |
+
- Modular model registry (for adding more model classes easily)
|
| 59 |
+
- Add OpenAPI schema and example payloads for documentation
|
| 60 |
+
- Enable batch inference or upload support
|
docs/ENVIRONMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🔧 Environment Management Guide
|
| 2 |
+
|
| 3 |
+
## AI-Driven Polymer Aging Prediction and Classification System
|
| 4 |
+
|
| 5 |
+
**Maintainer:** Jaser Hasan
|
| 6 |
+
**Snapshot:** `@artifact-isolation-complete`
|
| 7 |
+
**Last Updated:** 2025-06-26
|
| 8 |
+
**Environments:** Conda (local) + venv on `/scratch` (HPC)
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## 🧠 Overview
|
| 13 |
+
|
| 14 |
+
This guide describes how to set up and activate the Python environments required to run the Raman pipeline on both:
|
| 15 |
+
|
| 16 |
+
- **Local Systems:** (Mac/Windows/Linux)
|
| 17 |
+
- **CWRU Pioneer HPC:** (GPU nodes, venv based)
|
| 18 |
+
|
| 19 |
+
This guide documents the environment structure and the divergence between the **local Conda environment (`polymer_env`)** and the **HPC Python virtual environment (`polymer_venv`)**.
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## 📁 Environment Overview
|
| 24 |
+
|
| 25 |
+
| Platform | Environment | Manager | Path | Notes |
|
| 26 |
+
|----------|-------------|---------|------|-------|
|
| 27 |
+
| Local (dev) | `polymer_env` | **Conda** | `~/miniconda3/envs/polymer_env` | Primary for day-to-day development |
|
| 28 |
+
| HPC (Pioneer) | `polymer_venv` | **venv** (Python stdlib) | `/scratch/users/<case_id>/polymer_project/polymer_venv` | Created under `/scratch` to avoid `/home` quota limits |
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## 💻 Local Installation (Conda)
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
|
| 36 |
+
git clone https://github.com/dev-jaser/ai-ml-polymer-aging-prediction.git
|
| 37 |
+
cd polymer_project
|
| 38 |
+
conda env create -f environment.yml
|
| 39 |
+
conda activate polymer_env
|
| 40 |
+
python -c "import torch, sys; print('PyTorch:', torch.__version__, 'Python', sys.version")
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
> **Tip:** Keep Conda updated ('conda update conda') to reduce solver errors issues.
|
| 44 |
+
|
| 45 |
+
---
|
| 46 |
+
|
| 47 |
+
## 🚀 CWRU Pioneer HPC Setup (venv + pip)
|
| 48 |
+
|
| 49 |
+
> Conda is intentionally **not** used on Pioneer due to prior codec and disk-quota
|
| 50 |
+
|
| 51 |
+
### 1. Load Python Module
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
|
| 55 |
+
module purge
|
| 56 |
+
module load Python/3.12.3-GCCcore-13.2.0
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### 2. Create Working Directory in `/scratch`
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
|
| 63 |
+
mkdir -p /scratch/users/<case_id>/polymer_project_runtime
|
| 64 |
+
cd /scratch/users/<case_id>/polymer_project_runtime
|
| 65 |
+
git clone https://github.com/dev-jaser/ai-ml-polymer-aging-prediction.git
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
### 3. Create & Activate Virtual Environment
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
|
| 72 |
+
python3 -m venv polymer_env
|
| 73 |
+
source polymer_env/bin/activate
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### 4. Install Dependencies
|
| 77 |
+
|
| 78 |
+
```bash
|
| 79 |
+
|
| 80 |
+
pip install --upgrade pip
|
| 81 |
+
pip install -r environment_hpc.yml # Optimized dependencies list for Pioneer
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
(Optional) Save a reproducible freeze:
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
|
| 88 |
+
pip freeze > requirements_hpc.txt
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## ✅ Supported CLI Workflows (Raman-only)
|
| 94 |
+
|
| 95 |
+
| Script | Purpose |
|
| 96 |
+
|--------|---------|
|
| 97 |
+
| `scripts/train_model.py` | 10-fold CV training ('--model figure2' or 'resnet') |
|
| 98 |
+
| `scripts/run_inference.py` | Predict single Raman spectrum |
|
| 99 |
+
| `scripts/preprocess_dataset.py` | Apply full preprocessing chain |
|
| 100 |
+
| `scripts/plot_spectrum.py` | Quick spectrum visualization (.png) |
|
| 101 |
+
|
| 102 |
+
> FTIR-related scripts are archived and *not installed* into the active environments.
|
| 103 |
+
|
| 104 |
+
---
|
| 105 |
+
|
| 106 |
+
## 🔁 Cross-Environment Parity
|
| 107 |
+
|
| 108 |
+
- Package sets in environment.yml and environment_hpc.yml are aligned.
|
| 109 |
+
- Diagnostics JSON structure and checkpoint filenames are identical on both systems.
|
| 110 |
+
- Training commands are copy-paste compatible between local shell and HPC login shell.
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
## 📦 Best Practices
|
| 115 |
+
|
| 116 |
+
- **Local:** use Conda for rapid iteration, notebook work, small CPU inference.
|
| 117 |
+
- **HPC:** use venv in `/scratch` for GPU training, never install large packages into `/home` (`'~/'`)
|
| 118 |
+
- Keep environments lightweight; remove unused libraries to minimize rebuild time.
|
| 119 |
+
- Update this guide if either environment definition changes.
|
docs/HPC_REMOTE_SETUP.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Accessing CWRU Pioneer HPC System Remotely via SSH (PuTTY)
|
| 2 |
+
|
| 3 |
+
## Step 1: Set up DUO Authentication for VPN Access
|
| 4 |
+
|
| 5 |
+
### 1. Enroll in DUO (if not already done):
|
| 6 |
+
|
| 7 |
+
> - Go to [case.edu/utech/duo](https://case.edu/utech/duo) and follow instructions to register your device (phone/tablet/hardward token)
|
| 8 |
+
> - This is required for FortiClient VPN authentication.
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## Step 2: Install and Configure FortiClient VPN
|
| 13 |
+
|
| 14 |
+
### 1. Download FortiClient VPN:
|
| 15 |
+
|
| 16 |
+
- Visit [case.edu/utech/help/forticlient-vpn](https://case.edu/utech/help/forticlient-vpn)
|
| 17 |
+
- Download the **FortiClient VPN** software for your specific device.
|
| 18 |
+
|
| 19 |
+
### 2. Install & Configure VPN
|
| 20 |
+
|
| 21 |
+
- Run the installer and complete setup
|
| 22 |
+
- Open FortiClient and configure new connection:
|
| 23 |
+
- **Connection Name**: `CWRU VPN` (or any name)
|
| 24 |
+
- **Remote Gateway**: `vpn.case.edu`
|
| 25 |
+
- **Customize Port**: `443`
|
| 26 |
+
- Enable "**Save Credentials**" (optional)
|
| 27 |
+
- Click **Save**
|
| 28 |
+
|
| 29 |
+
### 3. Connect to VPN:
|
| 30 |
+
|
| 31 |
+
- Enter your **CWRU Network ID** (e.g., `jxh369`) and password.
|
| 32 |
+
- Complete **DUO two-factor authentication** when prompted (approve via phone/device)
|
| 33 |
+
- Once connected, you'll see a confirmation message.
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## Step 3: Install PuTTY (SSH Client)
|
| 38 |
+
|
| 39 |
+
### 1. Download PuTTY:
|
| 40 |
+
|
| 41 |
+
- If not installed, download from [https://www.putty.org](https://www.putty.org)
|
| 42 |
+
- Run the installer (or use the portable version).
|
| 43 |
+
|
| 44 |
+
## 2. Open PuTTY:
|
| 45 |
+
|
| 46 |
+
- Launch PuTTY from the Start Menu
|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
## Step 4: Configure PuTTY for Pioneer HPC
|
| 51 |
+
|
| 52 |
+
### 1. Enter Connection Details:
|
| 53 |
+
|
| 54 |
+
- **Host Name (or IP address)**: `pioneer.case.edu`
|
| 55 |
+
- **Port**: `22`
|
| 56 |
+
- **Connection Type**: SSH
|
| 57 |
+
|
| 58 |
+
### 2. Optional: Save Session (for future use):
|
| 59 |
+
|
| 60 |
+
- Under "**Saved Sessions**", type `Pioneer HPC` and click **Save**
|
| 61 |
+
|
| 62 |
+
### 3. Click "Open" to initiate the connection
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## Step 5: Log In via SSH
|
| 67 |
+
|
| 68 |
+
### 1. Enter Credentials:
|
| 69 |
+
|
| 70 |
+
- When prompted, enter your **CWRU Network ID** (e.g., `jxh369`)
|
| 71 |
+
- Enter your password (same as VPN/CWRU login)
|
| 72 |
+
- Complete DUO authentication again if required
|
| 73 |
+
|
| 74 |
+
### 2. Successful Login:
|
| 75 |
+
|
| 76 |
+
- You should now see the **Pioneer HPC command-line interface**
|
| 77 |
+
|
| 78 |
+
---
|
| 79 |
+
|
| 80 |
+
## Step 6: Disconnecting
|
| 81 |
+
|
| 82 |
+
### 1. Exit SSH Session:
|
| 83 |
+
|
| 84 |
+
- Type `exit` or `logout` in the terminal
|
| 85 |
+
|
| 86 |
+
### 2. Disconnect VPN:
|
| 87 |
+
|
| 88 |
+
- Close PuTTY and disconnect FortiClient VPN when done.
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
## Troubleshooting Tips
|
| 93 |
+
|
| 94 |
+
### VPN Fails?
|
| 95 |
+
|
| 96 |
+
- Ensure DUO is set up correctly
|
| 97 |
+
- Try reconnecting or restarting FortiClient VPN
|
| 98 |
+
|
| 99 |
+
### PuTTY Connection Refused?
|
| 100 |
+
|
| 101 |
+
- Verify VPN is active (`vpn.case.edu` shows "**Connected**")
|
| 102 |
+
- Check `pioneer.case.edu` and port `22` are correct
|
| 103 |
+
|
| 104 |
+
## DUO Not Prompting?
|
| 105 |
+
|
| 106 |
+
- Ensure your device is registered in DUO
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
## Extra Help on CWRU HPC Systems
|
| 110 |
+
|
| 111 |
+
[https://sites.google.com/a/case.edu/hpcc/](https://sites.google.com/a/case.edu/hpcc/)
|
docs/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 dev-jaser
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
docs/PROJECT_TIMELINE.md
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📅 PROJECT_TIMELINE.md
|
| 2 |
+
|
| 3 |
+
## AI-Driven Polymer Aging Prediction and Classification System
|
| 4 |
+
|
| 5 |
+
**Intern:** Jaser Hasan
|
| 6 |
+
|
| 7 |
+
### ✅ PHASE 1 – Project Kickoff and Faculty Guidance
|
| 8 |
+
|
| 9 |
+
**Tag:** `@project-init-complete`
|
| 10 |
+
|
| 11 |
+
Received first set of research tasks from Prof. Kuppannagari
|
| 12 |
+
|
| 13 |
+
- Reeived research plan
|
| 14 |
+
- Objectives defined: download datasets, analyze spectra, implement CNN, run initial inference
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
### ✅ PHASE 2 – Dataset Acquisition (Local System)
|
| 19 |
+
|
| 20 |
+
**Tag:** `@data-downloaded`
|
| 21 |
+
|
| 22 |
+
- Downloaded Raman `.txt` (RDWP) and FTIR `.csv` data (polymer packaging)
|
| 23 |
+
- Structured into:
|
| 24 |
+
- `datasets/rdwp`
|
| 25 |
+
- `datasets/ftir`
|
| 26 |
+
|
| 27 |
+
---
|
| 28 |
+
|
| 29 |
+
### ✅ PHASE 3 – Data Exploration & Spectral Validation
|
| 30 |
+
|
| 31 |
+
**Tag:** `@data-exploration-complete`
|
| 32 |
+
|
| 33 |
+
- Built plotting tools for Raman and FTIR
|
| 34 |
+
- Validated spectrum structure, removed malformed samples
|
| 35 |
+
- Observed structural inconsistencies in FTIR multi-layer grouping
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
### ✅ PHASE 4 – Preprocessing Pipeline Implementation
|
| 40 |
+
|
| 41 |
+
**Tag:** `@data-prep`
|
| 42 |
+
|
| 43 |
+
- Implemented `preprocess_dataset.py` for Raman
|
| 44 |
+
- Applied: Resampling -> Baseline correction -> Smoothing -> Normalization
|
| 45 |
+
- Confirmed reproducible input/output behavior and dynamic CLI control
|
| 46 |
+
|
| 47 |
+
### ✅ PHASE 5 – Figure2CNN Architecture Build
|
| 48 |
+
|
| 49 |
+
**Tag:** `@figure2cnn-complete`
|
| 50 |
+
|
| 51 |
+
- Constructed `Figure2CNN` modeled after Figure 2 CNN from research paper
|
| 52 |
+
- `Figure2CNN`: 4 conv layers + 3 FC layers
|
| 53 |
+
- Verified dynamic input length handling (e.g., 500, 1000, 4000)
|
| 54 |
+
|
| 55 |
+
---
|
| 56 |
+
|
| 57 |
+
### ✅ PHASE 6 – Local Training and Inference
|
| 58 |
+
|
| 59 |
+
**Tag:** `@figure2cnn-training-local`
|
| 60 |
+
|
| 61 |
+
- Trained Raman models locally (FTIR now deferred)
|
| 62 |
+
- Canonical Raman accuracy: **87.29% ± 6.30%**
|
| 63 |
+
- FTIR accuracy results archived and excluded from current validation
|
| 64 |
+
- CLI tools for training, inference, plotting implemented
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
### ✅ PHASE 7 – Reproducibility and Documentation Setup
|
| 69 |
+
|
| 70 |
+
**Tag:** `@project-docs-started`
|
| 71 |
+
|
| 72 |
+
- Authored `README.md`, `PROJECT_REPORT.md`, and `ENVIRONMENT_GUIDE.md`
|
| 73 |
+
- Defined reproducibility guidelines
|
| 74 |
+
- Standardized project structure and versioning
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
### ✅ PHASE 8 – HPC Access and Venv Strategy
|
| 79 |
+
|
| 80 |
+
**Tag:** `@hpc-login-successful`
|
| 81 |
+
|
| 82 |
+
- Logged into CWRU Pioneer (SSH via PuTTY)
|
| 83 |
+
- Setup up FortiClient VPN as it is required to access Pioneer remotely
|
| 84 |
+
- Explored module system; selected venv over Conda for compatibility
|
| 85 |
+
- Loaded Python 3.12.3 + created `polymer_env`
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
### ✅ PHASE 9 – HPC Environment Sync
|
| 90 |
+
|
| 91 |
+
**Tag:** `@venv-alignment-complete`
|
| 92 |
+
|
| 93 |
+
- Created `environment_hpc.yml`
|
| 94 |
+
- Installed dependencies into `polymer_env`
|
| 95 |
+
- Validated imports, PyTorch installation, and CLI script execution
|
| 96 |
+
|
| 97 |
+
---
|
| 98 |
+
|
| 99 |
+
### ✅ PHASE 10 – Full Instruction Validation on HPC
|
| 100 |
+
|
| 101 |
+
**Tag:** `@prof-k-instruction-validation-complete`
|
| 102 |
+
|
| 103 |
+
- Ran Raman preprocessing and plotting scripts
|
| 104 |
+
- Executed `run_inference.py` with CLI on raw Raman `.txt` file
|
| 105 |
+
- Verified consistent predictions and output logging across local and HPC
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
### ✅ PHASE 11 – FTIR Path Paused, Raman Declared Primary
|
| 110 |
+
|
| 111 |
+
**Tag:** `@raman-pipeline-focus-milestone`
|
| 112 |
+
|
| 113 |
+
- FTIR modeling formally deferred
|
| 114 |
+
- FTIR preprocessing scripts preserved and archived for future use
|
| 115 |
+
- All resources directed toward Raman pipeline finalization
|
| 116 |
+
- Saliency, FTIR ingestion, and `train_ftir_model.py` archived
|
| 117 |
+
|
| 118 |
+
---
|
| 119 |
+
|
| 120 |
+
### ✅ PHASE 12 – ResNet1D Prototyping & Benchmark Setup
|
| 121 |
+
|
| 122 |
+
**Tag:** `@resnet-prototype-complete`
|
| 123 |
+
|
| 124 |
+
- Built `ResNet1D` architecture in `models/resnet_cnn.py`
|
| 125 |
+
- Integrated `train_model.py` via `--model resnet`
|
| 126 |
+
- Ran initial CV training with successful results
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
### ✅ PHASE 13 – Output Artifact Isolation
|
| 131 |
+
|
| 132 |
+
**Tag:** `@artifact-isolation-complete`
|
| 133 |
+
|
| 134 |
+
- Patched `train_model.py` to save:
|
| 135 |
+
- `figure2_model.pth`, `resnet_model.pth`
|
| 136 |
+
- `raman_figure2_diagnostics.json`. `raman_resnet_diagnostics.json`
|
| 137 |
+
- Prevented all overwrites by tying output filenames to `args.model`
|
| 138 |
+
- Snapshotted as reproducibility milestone. Enabled downstream validation harness.
|
| 139 |
+
|
| 140 |
+
### ✅ PHASE 14 – Canonical Validation Achieved
|
| 141 |
+
|
| 142 |
+
**Tag:** `@validation-loop-complete`
|
| 143 |
+
|
| 144 |
+
- Created `validate_pipeline.sh` to verify preprocessing, training, inferece, plotting
|
| 145 |
+
- Ran full validation using `Figure2CNN` with reproducible CLI config
|
| 146 |
+
- All ouputs verified: logs, artifacts, predictions, plots
|
| 147 |
+
- Declared Raman pipeline scientifically validated and stable
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
### ⏭️ NEXT - Results Analysis & Finalization
|
| 152 |
+
|
| 153 |
+
- Analyze logged diagnostics for both models
|
| 154 |
+
- Conduct optional hyperparameter tuning (batch size, LR)
|
| 155 |
+
- Begin deliverable prep: visuals, posters, cards
|
| 156 |
+
- Resume FTIR work only after Raman path is fully stablized and documented & open FTIR conceptual error is resolved
|
docs/REPRODUCIBILITY.md
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📚 REPRODUCIBILITY.md
|
| 2 |
+
|
| 3 |
+
*AI-Driven Polymer Aging Prediction & Classification System*
|
| 4 |
+
*(Canonical Raman-only Pipeline)*
|
| 5 |
+
|
| 6 |
+
> **Purpose**
|
| 7 |
+
> A single document that lets any new user clone the repo, arquire the dataset, recreate the conda environment, and generate the validated Raman pipeline artifacts.
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## 1. System Requirements
|
| 12 |
+
|
| 13 |
+
| Component | Minimum Version | Notes |
|
| 14 |
+
|-----------|-----------------|-------|
|
| 15 |
+
| Python | 3.10+ | Conda recommended |
|
| 16 |
+
| Git | 2.30+ | Any modern version |
|
| 17 |
+
| Conda | 23.1+ | Mamba also fine |
|
| 18 |
+
| OS | Linux / MacOS / Windows | CPU run (no GPU needed) |
|
| 19 |
+
| Disk | ~1 GB | Dataset + artifacts |
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## 2. Clone Repository
|
| 24 |
+
|
| 25 |
+
```bash
|
| 26 |
+
git clone https://github.com/dev-jaser/ai-ml-polymer-aging-prediction.git
|
| 27 |
+
cd ai-ml-polymer-aging-prediction
|
| 28 |
+
git checkout main
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## 3. Create & Activate Conda Environment
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
conda env create -f environment.yml
|
| 37 |
+
conda activate polymer_env
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
> **Tip:** If you already created `polymer_env` just run `conda activate polymer_env`
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## 4. Download RDWP Raman Dataset
|
| 45 |
+
|
| 46 |
+
1. Visit https://data.mendeley.com/datasets/kpygrf9fg6/1
|
| 47 |
+
2. Download the archive (**RDWP.zip or similar**) by clicking `Download Add 10.3 MB`
|
| 48 |
+
3. Extract all `*.txt` Raman files into:
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
ai-ml-polymer-aging-prediction/datasets/rdwp
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
4. Quick sanity check:
|
| 55 |
+
|
| 56 |
+
```bash
|
| 57 |
+
ls datasets/rdwp | grep ".txt" | wc -l # -> 170 + files expected
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## 5. Validate the Entire Pipeline
|
| 63 |
+
|
| 64 |
+
Run the canonical smoke-test harness:
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
./validate_pipeline.sh
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
Successful run prints:
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
[PASS] Preprocessing
|
| 74 |
+
[PASS] Training & artificats
|
| 75 |
+
[PASS] Inference
|
| 76 |
+
[PASS] Plotting
|
| 77 |
+
All validation checks passed!
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Artifacts created:
|
| 81 |
+
|
| 82 |
+
```bash
|
| 83 |
+
outputs/figure2_model.pth
|
| 84 |
+
outputs/logs/raman_figure2_diagnostics.json
|
| 85 |
+
outputs/inference/test_prediction.json
|
| 86 |
+
outputs/plots/validation_plot.png
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
---
|
| 90 |
+
|
| 91 |
+
## 6. Optional: Train ResNet Variant
|
| 92 |
+
|
| 93 |
+
```python
|
| 94 |
+
python scripts/train_model.py --model resnet --target-len 4000 --baseline --smooth --normalize
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
Check that these exist now:
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
outputs/resnet_model.pth
|
| 101 |
+
outputs/logs/raman_resnet_diagnostics.json
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
---
|
| 105 |
+
|
| 106 |
+
## 7. Clean-up & Re-Run
|
| 107 |
+
|
| 108 |
+
To re-run from a clean state:
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
rm -rf outputs/*
|
| 112 |
+
./validate_pipeline.sh
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
All artifacts will be regenerated.
|
| 116 |
+
|
| 117 |
+
---
|
| 118 |
+
|
| 119 |
+
## 8. Troubleshooting
|
| 120 |
+
|
| 121 |
+
| Symptom | Likely Cause | Fix |
|
| 122 |
+
|---------|--------------|-----|
|
| 123 |
+
| `ModuleNotFoundError` during scripts| `conda activate polymer_env` not done | Activate env|
|
| 124 |
+
| `CUDA not available` warning | Running on CPU | Safe to ignore |
|
| 125 |
+
| Fewer than 170 files in `datasets/rdwp` | Incomplete extract | Re-download archive |
|
| 126 |
+
| `validate_pipeline.sh: Permission denied` | Missing executable bit | `chmod +x validated_pipeline.sh` |
|
| 127 |
+
|
| 128 |
+
---
|
| 129 |
+
|
| 130 |
+
## 9. Contact
|
| 131 |
+
|
| 132 |
+
For issues or questions, open an Issue in the GitHub repo or contact @dev-jaser
|
models/__init__.py
ADDED
|
File without changes
|
models/figure2_cnn.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📌 MODEL DESIGNATION:
|
| 2 |
+
# Figure2CNN is validated ONLY for RAMAN spectra input.
|
| 3 |
+
# Any use for FTIR modeling is invalid and deprecated.
|
| 4 |
+
# See milestone: @figure2cnn-raman-only-milestone
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Figure2CNN(nn.Module):
|
| 11 |
+
"""
|
| 12 |
+
CNN architecture based on Figure 2 of the referenced research paper.
|
| 13 |
+
Designed for 1D spectral data input of length 500
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, input_length=500, input_channels=1):
|
| 17 |
+
super(Figure2CNN, self).__init__()
|
| 18 |
+
|
| 19 |
+
self.input_channels = input_channels
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
self.conv_block = nn.Sequential(
|
| 23 |
+
nn.Conv1d(input_channels, 16, kernel_size=5, padding=2),
|
| 24 |
+
nn.ReLU(),
|
| 25 |
+
nn.MaxPool1d(kernel_size=2),
|
| 26 |
+
|
| 27 |
+
nn.Conv1d(16, 32, kernel_size=5, padding=2),
|
| 28 |
+
nn.ReLU(),
|
| 29 |
+
nn.MaxPool1d(kernel_size=2),
|
| 30 |
+
|
| 31 |
+
nn.Conv1d(32, 64, kernel_size=5, padding=2),
|
| 32 |
+
nn.ReLU(),
|
| 33 |
+
nn.MaxPool1d(kernel_size=2),
|
| 34 |
+
|
| 35 |
+
nn.Conv1d(64, 128, kernel_size=5, padding=2),
|
| 36 |
+
nn.ReLU(),
|
| 37 |
+
nn.MaxPool1d(kernel_size=2),
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Dynamically calculate flattened size after conv + pooling
|
| 41 |
+
self.flattened_size = self._get_flattened_size(input_channels, input_length)
|
| 42 |
+
|
| 43 |
+
self.classifier = nn.Sequential(
|
| 44 |
+
nn.Linear(self.flattened_size, 256),
|
| 45 |
+
nn.ReLU(),
|
| 46 |
+
nn.Linear(256, 128),
|
| 47 |
+
nn.ReLU(),
|
| 48 |
+
nn.Linear(128, 2) # Binary output
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
def _get_flattened_size(self,input_channels, input_length):
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
dummy_input = torch.zeros(1, input_channels, input_length)
|
| 54 |
+
out = self.conv_block(dummy_input)
|
| 55 |
+
return out.view(1, -1).shape[1]
|
| 56 |
+
|
| 57 |
+
def forward(self, x):
|
| 58 |
+
"""
|
| 59 |
+
Defines the forward pass of the Figure2CNN model.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
x (torch.Tensor): Input tensor of shape (batch_size, channels, input_length).
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
torch.Tensor: Output tensor containing class scores.
|
| 66 |
+
"""
|
| 67 |
+
x = self.conv_block(x)
|
| 68 |
+
x = x.view(x.size(0), -1) # Flatten
|
| 69 |
+
return self.classifier(x)
|
| 70 |
+
|
| 71 |
+
def describe_model(self):
|
| 72 |
+
"""Print architecture and flattened size (for debug). """
|
| 73 |
+
print(r"\n Model Summary:")
|
| 74 |
+
print(r" - Conv Block: 4 Layers")
|
| 75 |
+
print(f" - Input length: {self.flattened_size} after conv/pool")
|
| 76 |
+
print(f" - Classifier: {self.classifier}\n")
|
| 77 |
+
|
models/resnet_cnn.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
📌 MODEL DESIGNATION:
|
| 3 |
+
Figure2CNN is validated ONLY for RAMAN spectra input.
|
| 4 |
+
Any use for FTIR modeling is invalid and deprecated.
|
| 5 |
+
See milestone: @figure2cnn-raman-only-milestone
|
| 6 |
+
"""
|
| 7 |
+
import torch
|
| 8 |
+
import torch.nn as nn
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ResidualBlock1D(nn.Module):
|
| 12 |
+
"""
|
| 13 |
+
Basic 1-D residual block:
|
| 14 |
+
Conv1d -> ReLU -> Conv1d (+ skip connection).
|
| 15 |
+
If channel count changes, a 1x1 Conv aligns the skip path.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3):
|
| 19 |
+
super().__init__()
|
| 20 |
+
padding = kernel_size // 2
|
| 21 |
+
|
| 22 |
+
self.conv1 = nn.Conv1d(in_channels, out_channels,
|
| 23 |
+
kernel_size, padding=padding)
|
| 24 |
+
self.relu = nn.ReLU(inplace=True)
|
| 25 |
+
self.conv2 = nn.Conv1d(out_channels, out_channels,
|
| 26 |
+
kernel_size, padding=padding)
|
| 27 |
+
|
| 28 |
+
self.skip = (
|
| 29 |
+
nn.Identity()
|
| 30 |
+
if in_channels == out_channels
|
| 31 |
+
else nn.Conv1d(in_channels, out_channels, kernel_size=1)
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 35 |
+
identity = self.skip(x)
|
| 36 |
+
out = self.relu(self.conv1(x))
|
| 37 |
+
out = self.conv2(out)
|
| 38 |
+
return self.relu(out + identity)
|
| 39 |
+
|
| 40 |
+
def describe_model(self):
|
| 41 |
+
"""Print architecture and flattened size (for debug). """
|
| 42 |
+
print(r"\n Model Summary:")
|
| 43 |
+
print(r" - Conv Block: 4 Layers")
|
| 44 |
+
print(f" - Input length: {self.flattened_size} after conv/pool")
|
| 45 |
+
print(f" - Classifier: {self.classifier}\n")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class ResNet1D(nn.Module):
|
| 49 |
+
"""
|
| 50 |
+
Lightweight 1-D ResNet for Raman spectra (length 500, single channel).
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
def __init__(self, input_length: int = 500, num_classes: int = 2):
|
| 54 |
+
super().__init__()
|
| 55 |
+
|
| 56 |
+
# Three residual stages
|
| 57 |
+
self.stage1 = ResidualBlock1D(1, 16)
|
| 58 |
+
self.stage2 = ResidualBlock1D(16, 32)
|
| 59 |
+
self.stage3 = ResidualBlock1D(32, 64)
|
| 60 |
+
|
| 61 |
+
# Global aggregation + classifier
|
| 62 |
+
self.global_pool = nn.AdaptiveAvgPool1d(1) # -> [B, 64, 1]
|
| 63 |
+
self.fc = nn.Linear(64, num_classes)
|
| 64 |
+
|
| 65 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 66 |
+
x = self.stage1(x)
|
| 67 |
+
x = self.stage2(x)
|
| 68 |
+
x = self.stage3(x)
|
| 69 |
+
x = self.global_pool(x).squeeze(-1) # -> [B, 64]
|
| 70 |
+
return self.fc(x)
|
outputs/resnet_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f1d1b5541ade480077eeae8c627b8e2372076cc52f0be4e69a3b063895653a9
|
| 3 |
+
size 114450
|
scripts/__init__.py
ADDED
|
File without changes
|
scripts/discover_raman_files.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def list_txt_files(root_dir):
|
| 7 |
+
"""Recursively lists all .txt files in a directory."""
|
| 8 |
+
txt_files = []
|
| 9 |
+
for dirpath, _, filenames in os.walk(root_dir):
|
| 10 |
+
for file in filenames:
|
| 11 |
+
if file.endswith(".txt"):
|
| 12 |
+
full_path = os.path.join(dirpath, file)
|
| 13 |
+
txt_files.append(full_path)
|
| 14 |
+
return txt_files
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def label_file(filepath):
|
| 18 |
+
"""
|
| 19 |
+
Assigns label based on filename prefix:
|
| 20 |
+
- 'sta-' => 0 (pristine)
|
| 21 |
+
- 'wea-' => 1 (weathered)
|
| 22 |
+
Returns None if prefix is unknown.
|
| 23 |
+
"""
|
| 24 |
+
filename = os.path.basename(filepath).lower()
|
| 25 |
+
if filename.startswith("sta-"):
|
| 26 |
+
return 0
|
| 27 |
+
elif filename.startswith("wea-"):
|
| 28 |
+
return 1
|
| 29 |
+
else:
|
| 30 |
+
return None # Unknown or irrelevant
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
dataset_dir = os.path.join(
|
| 35 |
+
"datasets", "rdwp",
|
| 36 |
+
"A Raman database of microplastics weathered under natural environments"
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
txt_paths = list_txt_files(dataset_dir)
|
| 40 |
+
|
| 41 |
+
print(f"Found {len(txt_paths)} .txt files.")
|
| 42 |
+
print("Sample Files: ")
|
| 43 |
+
for path in txt_paths[:5]:
|
| 44 |
+
print(" -", path)
|
| 45 |
+
|
| 46 |
+
labeled_files = []
|
| 47 |
+
for path in txt_paths:
|
| 48 |
+
label = label_file(path)
|
| 49 |
+
if label is not None:
|
| 50 |
+
labeled_files.append((path, label))
|
| 51 |
+
|
| 52 |
+
print(f"\nLabeled {len(labeled_files)} files:")
|
| 53 |
+
for path, label in labeled_files[:5]:
|
| 54 |
+
print(f" - {os.path.basename(path)} => Label: {label}")
|
scripts/list_spectra.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
list_spectra.py
|
| 3 |
+
|
| 4 |
+
This script provides functionality to recursively list all `.txt` files
|
| 5 |
+
within a specified directory. It is designed to assist in managing and
|
| 6 |
+
exploring datasets, particularly for Raman spectrum data stored in text files.
|
| 7 |
+
|
| 8 |
+
Functions:
|
| 9 |
+
- list_txt_files(root_dir): Recursively finds and returns a list of all `.txt`
|
| 10 |
+
files in the given directory.
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
- The script can be executed directly to list `.txt` files in a predefined
|
| 14 |
+
dataset directory and print a summary, including the total count and a
|
| 15 |
+
sample of file paths.
|
| 16 |
+
|
| 17 |
+
Example:
|
| 18 |
+
$ python list_spectra.py
|
| 19 |
+
Found 100 .txt files.
|
| 20 |
+
Sample Files:
|
| 21 |
+
- datasets/rdwp/.../file1.txt
|
| 22 |
+
- datasets/rdwp/.../file2.txt
|
| 23 |
+
"""
|
| 24 |
+
import sys
|
| 25 |
+
import os
|
| 26 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def list_txt_files(root_dir):
|
| 30 |
+
"""Recursively lists all .txt files in a directory."""
|
| 31 |
+
txt_files = []
|
| 32 |
+
for dirpath, _, filenames in os.walk(root_dir):
|
| 33 |
+
for file in filenames:
|
| 34 |
+
if file.endswith(".txt"):
|
| 35 |
+
full_path = os.path.join(dirpath, file)
|
| 36 |
+
txt_files.append(full_path)
|
| 37 |
+
return txt_files
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def label_file(filepath):
|
| 41 |
+
"""
|
| 42 |
+
Assigns label based on filename prefix:
|
| 43 |
+
- 'sta-' => 0 (pristine)
|
| 44 |
+
- 'wea-' => 1 (weathered)
|
| 45 |
+
Returns None if prefix is unknown.
|
| 46 |
+
"""
|
| 47 |
+
filename = os.path.basename(filepath).lower()
|
| 48 |
+
if filename.startswith("sta-"):
|
| 49 |
+
return 0
|
| 50 |
+
elif filename.startswith("wea-"):
|
| 51 |
+
return 1
|
| 52 |
+
else:
|
| 53 |
+
return None # Unknown or irrelevant
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
dataset_dir = os.path.join(
|
| 58 |
+
"datasets", "rdwp",
|
| 59 |
+
"A Raman database of microplastics weathered under natural environments"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
txt_paths = list_txt_files(dataset_dir)
|
| 63 |
+
|
| 64 |
+
print(f"Found {len(txt_paths)} .txt files.")
|
| 65 |
+
print("Sample Files: ")
|
| 66 |
+
for path in txt_paths[:5]:
|
| 67 |
+
print(" -", path)
|
| 68 |
+
|
| 69 |
+
labeled_files = []
|
| 70 |
+
for path in txt_paths:
|
| 71 |
+
label = label_file(path)
|
| 72 |
+
if label is not None:
|
| 73 |
+
labeled_files.append((path, label))
|
| 74 |
+
|
| 75 |
+
print(f"\nLabeled {len(labeled_files)} files:")
|
| 76 |
+
for path, label in labeled_files[:5]:
|
| 77 |
+
print(f" - {os.path.basename(path)} => Label: {label}")
|
scripts/plot_spectrum.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
plot_spectrum.py
|
| 3 |
+
|
| 4 |
+
This script provides functionality to load and plot Raman spectra from two-column `.txt` files.
|
| 5 |
+
|
| 6 |
+
Functions:
|
| 7 |
+
- load_spectrum(filepath): Reads a spectrum file and extracts Raman shift and intensity values.
|
| 8 |
+
- plot_spectrum(x, y, title): Plots the Raman spectrum with basic styling.
|
| 9 |
+
|
| 10 |
+
Command-line Usage:
|
| 11 |
+
The script can be run directly to load and plot a predefined spectrum file. Modify the `spectrum_file` variable to specify the file path.
|
| 12 |
+
|
| 13 |
+
Dependencies:
|
| 14 |
+
- os: For file path operations.
|
| 15 |
+
- matplotlib.pyplot: For plotting the spectrum.
|
| 16 |
+
|
| 17 |
+
Example:
|
| 18 |
+
python plot_spectrum.py
|
| 19 |
+
|
| 20 |
+
"""
|
| 21 |
+
import sys
|
| 22 |
+
import os
|
| 23 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 24 |
+
import matplotlib.pyplot as plt
|
| 25 |
+
|
| 26 |
+
def load_spectrum(filepath):
|
| 27 |
+
"""Loads a Raman spectrum from a two-column .txt file."""
|
| 28 |
+
x_vals, y_vals = [], []
|
| 29 |
+
with open(filepath, 'r', encoding='utf-8') as file:
|
| 30 |
+
for line in file:
|
| 31 |
+
parts = line.strip().split()
|
| 32 |
+
if len(parts) == 2:
|
| 33 |
+
try:
|
| 34 |
+
x, y = float(parts[0]), float(parts[1])
|
| 35 |
+
x_vals.append(x)
|
| 36 |
+
y_vals.append(y)
|
| 37 |
+
except ValueError:
|
| 38 |
+
continue # Skip lines that can't be converted
|
| 39 |
+
return x_vals, y_vals
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def plot_spectrum(x, y, title="Raman Spectrum"):
|
| 43 |
+
"""Plots the spectrum data with basic styling."""
|
| 44 |
+
plt.figure(figsize=(10, 5))
|
| 45 |
+
plt.plot(x, y, linewidth=1.5)
|
| 46 |
+
plt.xlabel("Raman Shift (cm⁻¹)")
|
| 47 |
+
plt.ylabel("Intensity (a.u.)")
|
| 48 |
+
plt.title(title)
|
| 49 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 50 |
+
plt.tight_layout()
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
import argparse
|
| 55 |
+
|
| 56 |
+
parser = argparse.ArgumentParser(description="Plot a Raman spectrum from a .txt file.")
|
| 57 |
+
parser.add_argument("--input", type=str, required=True, help="Path to input .txt file")
|
| 58 |
+
parser.add_argument("--output", type=str, required=False, help="Path to save .png image")
|
| 59 |
+
|
| 60 |
+
args = parser.parse_args()
|
| 61 |
+
spectrum_file = args.input
|
| 62 |
+
output_file = args.output
|
| 63 |
+
|
| 64 |
+
x, y = load_spectrum(spectrum_file)
|
| 65 |
+
plot_spectrum(x, y, title=os.path.basename(spectrum_file))
|
| 66 |
+
|
| 67 |
+
if output_file:
|
| 68 |
+
plt.savefig(output_file)
|
| 69 |
+
print(f"✅ Plot saved to {output_file}")
|
| 70 |
+
else:
|
| 71 |
+
plt.show()
|
scripts/preprocess_dataset.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This script preprocesses a dataset of spectra by resampling and labeling the data.
|
| 3 |
+
|
| 4 |
+
Functions:
|
| 5 |
+
- resample_spectrum(x, y, target_len): Resamples a spectrum to a fixed number of points.
|
| 6 |
+
- preprocess_dataset(...): Loads, resamples, and applies optional preprocessing steps:
|
| 7 |
+
- baseline correction
|
| 8 |
+
- Savitzky-Golay smoothing
|
| 9 |
+
- min-max normalization
|
| 10 |
+
|
| 11 |
+
The script expects the dataset directory to contain text files representing spectra.
|
| 12 |
+
Each file is:
|
| 13 |
+
1. Listed using `list_txt_files()`
|
| 14 |
+
2. Labeled using `label_file()`
|
| 15 |
+
3. Loaded using `load_spectrum()`
|
| 16 |
+
4. Resampled and optionally cleaned
|
| 17 |
+
5. Returned as arrays suitable for ML training
|
| 18 |
+
|
| 19 |
+
Dependencies:
|
| 20 |
+
- numpy
|
| 21 |
+
- scipy.interpolate, scipy.signal
|
| 22 |
+
- sklearn.preprocessing
|
| 23 |
+
- list_spectra (custom)
|
| 24 |
+
- plot_spectrum (custom)
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import os
|
| 28 |
+
import sys
|
| 29 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 30 |
+
import numpy as np
|
| 31 |
+
from scipy.interpolate import interp1d
|
| 32 |
+
from scipy.signal import savgol_filter
|
| 33 |
+
from sklearn.preprocessing import minmax_scale
|
| 34 |
+
from scripts.discover_raman_files import list_txt_files, label_file
|
| 35 |
+
from scripts.plot_spectrum import load_spectrum
|
| 36 |
+
|
| 37 |
+
# Default resample target
|
| 38 |
+
TARGET_LENGTH = 500
|
| 39 |
+
|
| 40 |
+
# Optional preprocessing steps
|
| 41 |
+
def remove_baseline(y):
|
| 42 |
+
"""Simple baseline correction using polynomial fitting (order 2)"""
|
| 43 |
+
x = np.arange(len(y))
|
| 44 |
+
coeffs = np.polyfit(x, y, deg=2)
|
| 45 |
+
baseline = np.polyval(coeffs, x)
|
| 46 |
+
return y - baseline
|
| 47 |
+
|
| 48 |
+
def normalize_spectrum(y):
|
| 49 |
+
"""Min-max normalization to [0, 1]"""
|
| 50 |
+
return minmax_scale(y)
|
| 51 |
+
|
| 52 |
+
def smooth_spectrum(y, window_length=11, polyorder=2):
|
| 53 |
+
"""Apply Savitzky-Golay smoothing."""
|
| 54 |
+
return savgol_filter(y, window_length, polyorder)
|
| 55 |
+
|
| 56 |
+
def resample_spectrum(x, y, target_len=TARGET_LENGTH):
|
| 57 |
+
"""Resample a spectrum to a fixed number of points."""
|
| 58 |
+
f_interp = interp1d(x, y, kind='linear', fill_value='extrapolate')
|
| 59 |
+
x_uniform = np.linspace(min(x), max(x), target_len)
|
| 60 |
+
y_uniform = f_interp(x_uniform)
|
| 61 |
+
return y_uniform
|
| 62 |
+
|
| 63 |
+
def preprocess_dataset(
|
| 64 |
+
dataset_dir,
|
| 65 |
+
target_len=500,
|
| 66 |
+
baseline_correction=False,
|
| 67 |
+
apply_smoothing=False,
|
| 68 |
+
normalize=False
|
| 69 |
+
):
|
| 70 |
+
"""
|
| 71 |
+
Load, resample, and preprocess all valid spectra in the dataset.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
dataset_dir (str): Path to the dataset
|
| 75 |
+
target_len (int): Number of points to resample to
|
| 76 |
+
baseline_correction (bool): Whether to apply baseline removal
|
| 77 |
+
apply_smoothing (bool): Whether to apply Savitzky-Golay smoothing
|
| 78 |
+
normalize (bool): Whether to apply min-max normalization
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
X (np.ndarray): Preprocessed spectra
|
| 82 |
+
y (np.ndarray): Corresponding labels
|
| 83 |
+
"""
|
| 84 |
+
txt_paths = list_txt_files(dataset_dir)
|
| 85 |
+
X, y_labels = [], []
|
| 86 |
+
|
| 87 |
+
for path in txt_paths:
|
| 88 |
+
label = label_file(path)
|
| 89 |
+
if label is None:
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
x_raw, y_raw = load_spectrum(path)
|
| 93 |
+
if len(x_raw) < 10:
|
| 94 |
+
continue # Skip files with too few points
|
| 95 |
+
|
| 96 |
+
# Resample
|
| 97 |
+
y_processed = resample_spectrum(x_raw, y_raw, target_len=target_len)
|
| 98 |
+
|
| 99 |
+
# Optional preprocessing
|
| 100 |
+
if baseline_correction:
|
| 101 |
+
y_processed = remove_baseline(y_processed)
|
| 102 |
+
if apply_smoothing:
|
| 103 |
+
y_processed = smooth_spectrum(y_processed)
|
| 104 |
+
if normalize:
|
| 105 |
+
y_processed = normalize_spectrum(y_processed)
|
| 106 |
+
|
| 107 |
+
X.append(y_processed)
|
| 108 |
+
y_labels.append(label)
|
| 109 |
+
|
| 110 |
+
return np.array(X), np.array(y_labels)
|
| 111 |
+
|
| 112 |
+
# Optional: Run directly for testing
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
dataset_dir = os.path.join(
|
| 115 |
+
"datasets", "rdwp"
|
| 116 |
+
)
|
| 117 |
+
X, y = preprocess_dataset(dataset_dir)
|
| 118 |
+
|
| 119 |
+
print(f"X shape: {X.shape}")
|
| 120 |
+
print(f"y shape: {y.shape}")
|
| 121 |
+
print(f"Label distribution: {np.bincount(y)}")
|
scripts/run_inference.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 4 |
+
|
| 5 |
+
import argparse
|
| 6 |
+
import warnings
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch
|
| 11 |
+
|
| 12 |
+
from models.figure2_cnn import Figure2CNN
|
| 13 |
+
from scripts.preprocess_dataset import resample_spectrum, label_file
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# =============================================
|
| 17 |
+
# ✅ Raman-Only Inference Script
|
| 18 |
+
# This script supports prediction on a single Raman spectrum (.txt file).
|
| 19 |
+
# FTIR inference has been deprecated and removed for scientific integrity.
|
| 20 |
+
# See: @raman-pipeline-focus-milestone
|
| 21 |
+
# =============================================
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
warnings.filterwarnings(
|
| 25 |
+
"ignore",
|
| 26 |
+
message=".*weights_only=False.*",
|
| 27 |
+
category=FutureWarning
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_raman_spectrum(filepath):
|
| 32 |
+
"""Load a 2-column Raman spectrum from a .txt file"""
|
| 33 |
+
x_vals, y_vals = [], []
|
| 34 |
+
with open(filepath, 'r', encoding='utf-8') as f:
|
| 35 |
+
for line in f:
|
| 36 |
+
parts = line.strip().split()
|
| 37 |
+
if len(parts) == 2:
|
| 38 |
+
try:
|
| 39 |
+
x, y = float(parts[0]), float(parts[1])
|
| 40 |
+
x_vals.append(x)
|
| 41 |
+
y_vals.append(y)
|
| 42 |
+
except ValueError:
|
| 43 |
+
continue
|
| 44 |
+
return np.array(x_vals), np.array(y_vals)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
parser = argparse.ArgumentParser(
|
| 49 |
+
description="Run inference on a single Raman spectrum (.txt file)."
|
| 50 |
+
)
|
| 51 |
+
parser.add_argument(
|
| 52 |
+
"--target-len", type=int, required=True,
|
| 53 |
+
help="Target length to match model input"
|
| 54 |
+
)
|
| 55 |
+
parser.add_argument(
|
| 56 |
+
"--input", required=True,
|
| 57 |
+
help="Path to Raman .txt file."
|
| 58 |
+
)
|
| 59 |
+
parser.add_argument(
|
| 60 |
+
"--model", default="random",
|
| 61 |
+
help="Path to .pth model file, or specify 'random' to use untrained weights."
|
| 62 |
+
)
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"--output", default=None,
|
| 65 |
+
help="Where to write prediction result. If omitted, prints to stdout."
|
| 66 |
+
)
|
| 67 |
+
verbosity = parser.add_mutually_exclusive_group()
|
| 68 |
+
verbosity.add_argument(
|
| 69 |
+
"--quiet", action="store_true",
|
| 70 |
+
help="Show only warnings and errors"
|
| 71 |
+
)
|
| 72 |
+
verbosity.add_argument(
|
| 73 |
+
"--verbose", action="store_true",
|
| 74 |
+
help="Show debug-level logging"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
args = parser.parse_args()
|
| 78 |
+
|
| 79 |
+
# configure logging
|
| 80 |
+
level = logging.INFO
|
| 81 |
+
if args.verbose:
|
| 82 |
+
level = logging.DEBUG
|
| 83 |
+
elif args.quiet:
|
| 84 |
+
level = logging.WARNING
|
| 85 |
+
logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
# Load & preprocess Raman spectrum
|
| 89 |
+
if os.path.isdir(args.input):
|
| 90 |
+
parser.error(f"Input must be a single Raman .txt file, got a directory: {args.input}")
|
| 91 |
+
|
| 92 |
+
x_raw, y_raw = load_raman_spectrum(args.input)
|
| 93 |
+
if len(x_raw) < 10:
|
| 94 |
+
parser.error("Spectrum too short for inference.")
|
| 95 |
+
|
| 96 |
+
data = resample_spectrum(x_raw, y_raw, target_len=args.target_len)
|
| 97 |
+
# Shape = (1, 1, target_len) — valid input for Raman inference
|
| 98 |
+
input_tensor = torch.tensor(data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
|
| 99 |
+
|
| 100 |
+
# 2. Load Model
|
| 101 |
+
model = Figure2CNN(
|
| 102 |
+
input_length=args.target_len,
|
| 103 |
+
input_channels=1
|
| 104 |
+
)
|
| 105 |
+
if args.model != "random":
|
| 106 |
+
model.load_state_dict(
|
| 107 |
+
torch.load(args.model, map_location="cpu", weights_only=True)
|
| 108 |
+
)
|
| 109 |
+
model.eval()
|
| 110 |
+
|
| 111 |
+
# 3. Inference
|
| 112 |
+
with torch.no_grad():
|
| 113 |
+
logits = model(input_tensor)
|
| 114 |
+
pred = torch.argmax(logits, dim=1).item()
|
| 115 |
+
|
| 116 |
+
# 4. True Label
|
| 117 |
+
try:
|
| 118 |
+
true_label = label_file(args.input)
|
| 119 |
+
label_str = f"True Label: {true_label}"
|
| 120 |
+
except FileNotFoundError:
|
| 121 |
+
label_str = "True Label: Unknown"
|
| 122 |
+
|
| 123 |
+
result = f"Predicted Label: {pred} {label_str}\nRaw Logits: {logits.tolist()}"
|
| 124 |
+
logging.info(result)
|
| 125 |
+
|
| 126 |
+
# 5. Save or stdout
|
| 127 |
+
if args.output:
|
| 128 |
+
with open(args.output, "w", encoding="utf-8") as fout:
|
| 129 |
+
fout.write(result)
|
| 130 |
+
logging.info("Result saved to %s", args.output)
|
| 131 |
+
|
| 132 |
+
sys.exit(0)
|
| 133 |
+
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logging.error(e)
|
| 136 |
+
sys.exit(1)
|
scripts/train_model.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, sys, json
|
| 2 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import argparse, numpy as np, torch
|
| 5 |
+
from torch.utils.data import TensorDataset, DataLoader
|
| 6 |
+
from sklearn.model_selection import StratifiedKFold
|
| 7 |
+
from sklearn.metrics import confusion_matrix
|
| 8 |
+
|
| 9 |
+
# Add project-specific imports
|
| 10 |
+
from scripts.preprocess_dataset import preprocess_dataset
|
| 11 |
+
from models.figure2_cnn import Figure2CNN
|
| 12 |
+
from models.resnet_cnn import ResNet1D
|
| 13 |
+
|
| 14 |
+
# Argument parser for CLI usage
|
| 15 |
+
parser = argparse.ArgumentParser(
|
| 16 |
+
description="Run 10-fold CV on Raman data with optional preprocessing.")
|
| 17 |
+
parser.add_argument("--target-len", type=int, default=500)
|
| 18 |
+
parser.add_argument("--baseline", action="store_true")
|
| 19 |
+
parser.add_argument("--smooth", action="store_true")
|
| 20 |
+
parser.add_argument("--normalize", action="store_true")
|
| 21 |
+
parser.add_argument("--batch-size", type=int, default=16)
|
| 22 |
+
parser.add_argument("--epochs", type=int, default=10)
|
| 23 |
+
parser.add_argument("--learning-rate", type=float, default=1e-3)
|
| 24 |
+
parser.add_argument("--model", type=str, default="figure2",
|
| 25 |
+
choices=["figure2", "resnet"])
|
| 26 |
+
args = parser.parse_args()
|
| 27 |
+
|
| 28 |
+
# Constants
|
| 29 |
+
# Raman-only dataset (RDWP)
|
| 30 |
+
DATASET_PATH = 'datasets/rdwp'
|
| 31 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
|
| 32 |
+
NUM_FOLDS = 10
|
| 33 |
+
|
| 34 |
+
# Ensure output dirs exist
|
| 35 |
+
os.makedirs("outputs", exist_ok=True)
|
| 36 |
+
os.makedirs("outputs/logs", exist_ok=True)
|
| 37 |
+
|
| 38 |
+
print("Preprocessing Configuration:")
|
| 39 |
+
print(f" Reseample to : {args.target_len}")
|
| 40 |
+
print(f" Baseline Correct: {'✅' if args.baseline else '❌'}")
|
| 41 |
+
print(f" Smoothing : {'✅' if args.smooth else '❌'}")
|
| 42 |
+
print(f" Normalization : {'✅' if args.normalize else '❌'}")
|
| 43 |
+
|
| 44 |
+
# Load + Preprocess data
|
| 45 |
+
print("🔄 Loading and preprocessing data ...")
|
| 46 |
+
X, y = preprocess_dataset(
|
| 47 |
+
DATASET_PATH,
|
| 48 |
+
target_len=args.target_len,
|
| 49 |
+
baseline_correction=args.baseline,
|
| 50 |
+
apply_smoothing=args.smooth,
|
| 51 |
+
normalize=args.normalize
|
| 52 |
+
)
|
| 53 |
+
X, y = np.array(X, np.float32), np.array(y, np.int64)
|
| 54 |
+
print(f"✅ Data Loaded: {X.shape[0]} samples, {X.shape[1]} features each.")
|
| 55 |
+
print(f"🔍 Using model: {args.model}")
|
| 56 |
+
|
| 57 |
+
# CV
|
| 58 |
+
skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
|
| 59 |
+
fold_accuracies = []
|
| 60 |
+
all_conf_matrices = []
|
| 61 |
+
|
| 62 |
+
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
|
| 63 |
+
print(f"\n🔁 Fold {fold}/{NUM_FOLDS}")
|
| 64 |
+
|
| 65 |
+
X_train, X_val = X[train_idx], X[val_idx]
|
| 66 |
+
y_train, y_val = y[train_idx], y[val_idx]
|
| 67 |
+
|
| 68 |
+
train_loader = DataLoader(
|
| 69 |
+
TensorDataset(torch.tensor(X_train), torch.tensor(y_train)),
|
| 70 |
+
batch_size=args.batch_size, shuffle=True)
|
| 71 |
+
val_loader = DataLoader(
|
| 72 |
+
TensorDataset(torch.tensor(X_val), torch.tensor(y_val)),batch_size=args.batch_size)
|
| 73 |
+
|
| 74 |
+
# Model selection
|
| 75 |
+
model = (Figure2CNN if args.model == "figure2" else ResNet1D)(
|
| 76 |
+
input_length=args.target_len).to(DEVICE)
|
| 77 |
+
|
| 78 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
|
| 79 |
+
criterion = torch.nn.CrossEntropyLoss()
|
| 80 |
+
|
| 81 |
+
for epoch in range(args.epochs):
|
| 82 |
+
model.train()
|
| 83 |
+
RUNNING_LOSS = 0.0
|
| 84 |
+
for inputs, labels in train_loader:
|
| 85 |
+
inputs = inputs.unsqueeze(1).to(DEVICE)
|
| 86 |
+
labels = labels.to(DEVICE)
|
| 87 |
+
|
| 88 |
+
optimizer.zero_grad()
|
| 89 |
+
loss = criterion(model(inputs), labels)
|
| 90 |
+
loss.backward()
|
| 91 |
+
optimizer.step()
|
| 92 |
+
RUNNING_LOSS += loss.item()
|
| 93 |
+
|
| 94 |
+
# After fold loop (outside the epoch loop), print 1 line:
|
| 95 |
+
print(f"✅ Fold {fold} done. Final loss: {RUNNING_LOSS:.4f}")
|
| 96 |
+
|
| 97 |
+
# Evaluation
|
| 98 |
+
model.eval()
|
| 99 |
+
all_true, all_pred = [], []
|
| 100 |
+
with torch.no_grad():
|
| 101 |
+
for inputs, labels in val_loader:
|
| 102 |
+
inputs = inputs.unsqueeze(1).to(DEVICE)
|
| 103 |
+
labels = labels.to(DEVICE)
|
| 104 |
+
outputs = model(inputs)
|
| 105 |
+
_, predicted = torch.max(outputs, 1)
|
| 106 |
+
all_true.extend(labels.cpu().numpy())
|
| 107 |
+
all_pred.extend(predicted.cpu().numpy())
|
| 108 |
+
|
| 109 |
+
acc = 100 * np.mean(np.array(all_true) == np.array(all_pred))
|
| 110 |
+
fold_accuracies.append(acc)
|
| 111 |
+
all_conf_matrices.append(confusion_matrix(all_true, all_pred))
|
| 112 |
+
print(f"✅ Fold {fold} Accuracy: {acc:.2f}%")
|
| 113 |
+
|
| 114 |
+
# Save model checkpoint **after** final fold
|
| 115 |
+
model_path = f"outputs/{args.model}_model.pth"
|
| 116 |
+
torch.save(model.state_dict(), model_path)
|
| 117 |
+
|
| 118 |
+
# Summary
|
| 119 |
+
mean_acc, std_acc = np.mean(fold_accuracies), np.std(fold_accuracies)
|
| 120 |
+
print("\n📊 Cross-Validation Results:")
|
| 121 |
+
for i, a in enumerate(fold_accuracies, 1):
|
| 122 |
+
print(f"Fold {i}: {a:.2f}%")
|
| 123 |
+
print(f"\n✅ Mean Accuracy: {mean_acc:.2f}% ± {std_acc:.2f}%")
|
| 124 |
+
print(f"✅ Model saved to {model_path}")
|
| 125 |
+
|
| 126 |
+
# Save diagnostics
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def save_diagnostics_log(fold_acc, confs, args_param, output_path):
|
| 130 |
+
fold_metrics = [{"fold": i+1, "accuracy": acc,
|
| 131 |
+
"confusion_matrix": c.tolist()}
|
| 132 |
+
for i, (a, c) in enumerate(zip(fold_acc, confs))]
|
| 133 |
+
log = {
|
| 134 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 135 |
+
"preprocessing": {
|
| 136 |
+
"target_len": args_param.target_len,
|
| 137 |
+
"baseline": args_param.baseline,
|
| 138 |
+
"smooth": args_param.smooth,
|
| 139 |
+
"normalize": args_param.normalize,
|
| 140 |
+
},
|
| 141 |
+
"fold_metrics": fold_metrics,
|
| 142 |
+
"overall": {
|
| 143 |
+
"mean_accuracy": float(np.mean(fold_acc)),
|
| 144 |
+
"std_accuracy": float(np.std(fold_acc)),
|
| 145 |
+
"num_folds": len(fold_acc),
|
| 146 |
+
"batch_size": args_param.batch_size,
|
| 147 |
+
"epochs": args_param.epochs,
|
| 148 |
+
"learning_rate": args_param.learning_rate,
|
| 149 |
+
"device": str(DEVICE)
|
| 150 |
+
}
|
| 151 |
+
}
|
| 152 |
+
with open(output_path, "w", encoding="utf-8") as f:
|
| 153 |
+
json.dump(log, f, indent=2)
|
| 154 |
+
print(f"🧠 Diagnostics written to {output_path}")
|
| 155 |
+
|
| 156 |
+
log_path = f"outputs/logs/raman_{args.model}_diagnostics.json"
|
| 157 |
+
save_diagnostics_log(fold_accuracies, all_conf_matrices, args, log_path)
|
validate_pipeline.sh
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# ===========================================
|
| 3 |
+
# validate_pipeline.sh — Canonical Smoke Test
|
| 4 |
+
# AI-Driven Polymer Aging Prediction System
|
| 5 |
+
# Requires: conda (or venv) already installed
|
| 6 |
+
# ===========================================
|
| 7 |
+
|
| 8 |
+
set -euo pipefail
|
| 9 |
+
RED='\033[0;31m'
|
| 10 |
+
GRN='\033[0;32m'
|
| 11 |
+
YLW='\033[1;33m'
|
| 12 |
+
NC='\033[0m'
|
| 13 |
+
|
| 14 |
+
die() {
|
| 15 |
+
echo -e "{RED}[FAIL] $1${NC}"
|
| 16 |
+
exit 1
|
| 17 |
+
}
|
| 18 |
+
pass() { echo -e "{GRN}[PASS] $1${NC}"; }
|
| 19 |
+
|
| 20 |
+
echo -e "${YLW}>>> Activating environment...${NC}"
|
| 21 |
+
source "$(conda info --base)/etc/profile.d/conda.sh"
|
| 22 |
+
conda activate polymer_env || die "conda env 'polymer_env' not found"
|
| 23 |
+
|
| 24 |
+
root_dir="$(dirname "$(readlink -f "$0")")"
|
| 25 |
+
cd "$root_dir" || fir "repo root not found"
|
| 26 |
+
|
| 27 |
+
# ---------- Step 1: Preprocessing ----------
|
| 28 |
+
echo -e "${YLW}>>> Step 1: Preprocessing${NC}"
|
| 29 |
+
python scripts/preprocess_dataset.py datasets/rdwp \
|
| 30 |
+
--target-len 500 --baseline --smooth --normalize |
|
| 31 |
+
grep -q "X shape:" || die "preprocess_dataset.py failed"
|
| 32 |
+
pass "Preprocessing"
|
| 33 |
+
|
| 34 |
+
# ---------- Step 2: CV Training (Figure2) ----------
|
| 35 |
+
echo -e "${YLW}>>> Step 2: 10-Fold CV Training${NC}"
|
| 36 |
+
python scripts/train_model.py \
|
| 37 |
+
--target-len 500 --baseline --smooth --normalize \
|
| 38 |
+
--model figure2
|
| 39 |
+
[[ -f outputs/figure2_model.pth ]] || die "model .pth not found"
|
| 40 |
+
[[ -f outputs/logs/raman_figure2_diagnostics.json ]] || die "diagnostics JSON not found"
|
| 41 |
+
pass "Training & artifacts"
|
| 42 |
+
|
| 43 |
+
# ---------- Step 3: Inference ----------
|
| 44 |
+
echo -e "${YLW}>>> Step 3: Inference${NC}"
|
| 45 |
+
python scripts/run_inference.py \
|
| 46 |
+
--target-len 500 \
|
| 47 |
+
--input datasets/rdwp/wea-100.txt \
|
| 48 |
+
--model outputs/figure2_model.pth \
|
| 49 |
+
--output outputs/inference/test_prediction.json
|
| 50 |
+
[[ -f outputs/inference/test_prediction.json ]] || die "inference output missing"
|
| 51 |
+
pass "Inference"
|
| 52 |
+
|
| 53 |
+
# ---------- Step 4: Spectrum Plot ----------
|
| 54 |
+
echo -e "${YLW}>>> Step 4: Plot Spectrum${NC}"
|
| 55 |
+
python scripts/plot_spectrum.py --input datasets/rdwp/sta-10.txt
|
| 56 |
+
[[ $? -eq 0 ]] || die "plot_spectrum.py failed"
|
| 57 |
+
pass "Plotting"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
echo -e "${GRN}All validation checks passed!${NC}"
|