devjas1 commited on
Commit
c024e8f
Β·
1 Parent(s): 68f2a01

Adds enhanced analysis functionality for spectroscopy

Browse files

Introduces a new interface for advanced multi-modal spectroscopy analysis using modern machine learning techniques.

Implements features for file upload, data quality assessment, intelligent preprocessing recommendations, and transparent AI analysis with explanations and hypothesis generation.

Enhances user experience with comprehensive data provenance tracking and visualization of analysis results.

Files changed (1) hide show
  1. pages/Enhanced_Analysis.py +433 -0
pages/Enhanced_Analysis.py ADDED
@@ -0,0 +1,433 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced Analysis Page for POLYMEROS
3
+ Advanced multi-modal spectroscopy analysis with modern ML architecture
4
+ """
5
+
6
+ import streamlit as st
7
+ import torch
8
+ import numpy as np
9
+ import matplotlib.pyplot as plt
10
+ from pathlib import Path
11
+ import io
12
+ from PIL import Image
13
+
14
+ # Import POLYMEROS components
15
+ import sys
16
+ import os
17
+
18
+ sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "modules"))
19
+
20
+ from modules.transparent_ai import TransparentAIEngine, PredictionExplanation
21
+ from modules.enhanced_data import (
22
+ EnhancedDataManager,
23
+ ContextualSpectrum,
24
+ SpectralMetadata,
25
+ )
26
+ from modules.advanced_spectroscopy import MultiModalSpectroscopyEngine
27
+ from modules.modern_ml_architecture import (
28
+ ModernMLPipeline,
29
+ )
30
+ from modules.enhanced_data_pipeline import EnhancedDataPipeline
31
+ from core_logic import load_model, parse_spectrum_data
32
+ from config import MODEL_CONFIG, TARGET_LEN
33
+
34
+ # Removed unused preprocess_spectrum import
35
+
36
+
37
+ def init_enhanced_analysis():
38
+ """Initialize enhanced analysis session state with new components"""
39
+ if "data_manager" not in st.session_state:
40
+ st.session_state.data_manager = EnhancedDataManager()
41
+
42
+ if "spectroscopy_engine" not in st.session_state:
43
+ st.session_state.spectroscopy_engine = MultiModalSpectroscopyEngine()
44
+
45
+ if "ml_pipeline" not in st.session_state:
46
+ st.session_state.ml_pipeline = ModernMLPipeline()
47
+ st.session_state.ml_pipeline.initialize_models()
48
+
49
+ if "data_pipeline" not in st.session_state:
50
+ st.session_state.data_pipeline = EnhancedDataPipeline()
51
+
52
+ if "transparent_ai" not in st.session_state:
53
+ st.session_state.transparent_ai = None
54
+
55
+ if "current_model" not in st.session_state:
56
+ st.session_state.current_model = None
57
+
58
+ if "analysis_results" not in st.session_state:
59
+ st.session_state.analysis_results = None
60
+
61
+
62
+ def load_enhanced_model(model_name: str):
63
+ """Load model and initialize transparent AI engine"""
64
+ try:
65
+ model = load_model(model_name)
66
+ if model is not None:
67
+ st.session_state.current_model = model
68
+ st.session_state.transparent_ai = TransparentAIEngine(model)
69
+ return True
70
+ return False
71
+ except Exception as e:
72
+ st.error(f"Error loading model: {e}")
73
+ return False
74
+
75
+
76
+ def render_enhanced_file_upload():
77
+ """Render enhanced file upload with metadata extraction"""
78
+ st.header("πŸ“ Enhanced Spectrum Analysis")
79
+
80
+ uploaded_file = st.file_uploader(
81
+ "Upload spectrum file (.txt)",
82
+ type=["txt"],
83
+ help="Upload a Raman or FTIR spectrum in text format",
84
+ )
85
+
86
+ if uploaded_file is not None:
87
+ # Parse spectrum data
88
+ try:
89
+ content = uploaded_file.read().decode("utf-8")
90
+ x_data, y_data = parse_spectrum_data(content)
91
+
92
+ # Create enhanced spectrum with metadata
93
+ metadata = SpectralMetadata(
94
+ filename=uploaded_file.name,
95
+ instrument_type="Raman", # Default, could be detected from filename
96
+ data_quality_score=None,
97
+ )
98
+
99
+ spectrum = ContextualSpectrum(x_data, y_data, metadata)
100
+
101
+ # Get data quality assessment
102
+ data_manager = st.session_state.data_manager
103
+ quality_score = data_manager._assess_data_quality(y_data)
104
+ spectrum.metadata.data_quality_score = quality_score
105
+
106
+ # Display quality assessment
107
+ col1, col2, col3 = st.columns(3)
108
+ with col1:
109
+ st.metric("Data Points", len(x_data))
110
+ with col2:
111
+ st.metric("Quality Score", f"{quality_score:.2f}")
112
+ with col3:
113
+ quality_color = (
114
+ "🟒"
115
+ if quality_score > 0.7
116
+ else "🟑" if quality_score > 0.4 else "πŸ”΄"
117
+ )
118
+ st.metric("Quality", f"{quality_color}")
119
+
120
+ # Get preprocessing recommendations
121
+ recommendations = data_manager.get_preprocessing_recommendations(spectrum)
122
+
123
+ st.subheader("Intelligent Preprocessing Recommendations")
124
+ rec_col1, rec_col2 = st.columns(2)
125
+
126
+ with rec_col1:
127
+ st.write("**Recommended settings:**")
128
+ for param, value in recommendations.items():
129
+ st.write(f"β€’ {param}: {value}")
130
+
131
+ with rec_col2:
132
+ st.write("**Manual override:**")
133
+ do_baseline = st.checkbox(
134
+ "Baseline correction",
135
+ value=recommendations.get("do_baseline", True),
136
+ )
137
+ do_smooth = st.checkbox(
138
+ "Smoothing", value=recommendations.get("do_smooth", True)
139
+ )
140
+ do_normalize = st.checkbox(
141
+ "Normalization", value=recommendations.get("do_normalize", True)
142
+ )
143
+
144
+ # Apply preprocessing with tracking
145
+ preprocessing_params = {
146
+ "do_baseline": do_baseline,
147
+ "do_smooth": do_smooth,
148
+ "do_normalize": do_normalize,
149
+ "target_len": TARGET_LEN,
150
+ }
151
+
152
+ if st.button("Process and Analyze"):
153
+ with st.spinner("Processing spectrum with provenance tracking..."):
154
+ # Apply preprocessing with full tracking
155
+ processed_spectrum = data_manager.preprocess_with_tracking(
156
+ spectrum, **preprocessing_params
157
+ )
158
+
159
+ # Store processed spectrum
160
+ st.session_state.processed_spectrum = processed_spectrum
161
+ st.success("Spectrum processed with full provenance tracking!")
162
+
163
+ # Display provenance information
164
+ st.subheader("Processing Provenance")
165
+ for record in processed_spectrum.provenance:
166
+ with st.expander(f"Operation: {record.operation}"):
167
+ st.write(f"**Timestamp:** {record.timestamp}")
168
+ st.write(f"**Parameters:** {record.parameters}")
169
+ st.write(f"**Input hash:** {record.input_hash}")
170
+ st.write(f"**Output hash:** {record.output_hash}")
171
+
172
+ except Exception as e:
173
+ st.error(f"Error processing file: {e}")
174
+
175
+
176
+ def render_transparent_analysis():
177
+ """Render transparent AI analysis with explanations"""
178
+ if "processed_spectrum" not in st.session_state:
179
+ st.info("Please upload and process a spectrum first.")
180
+ return
181
+
182
+ st.header("🧠 Transparent AI Analysis")
183
+
184
+ # Model selection
185
+ model_names = list(MODEL_CONFIG.keys())
186
+ selected_model = st.selectbox("Select AI model:", model_names)
187
+
188
+ if st.session_state.current_model is None or st.button("Load Model"):
189
+ with st.spinner(f"Loading {selected_model} model..."):
190
+ if load_enhanced_model(selected_model):
191
+ st.success(f"Model {selected_model} loaded successfully!")
192
+ else:
193
+ st.error("Failed to load model")
194
+ return
195
+
196
+ if st.session_state.transparent_ai is not None:
197
+ spectrum = st.session_state.processed_spectrum
198
+
199
+ if st.button("Run Transparent Analysis"):
200
+ with st.spinner("Running comprehensive analysis..."):
201
+ # Prepare input tensor
202
+ y_processed = spectrum.y_data
203
+ x_input = torch.tensor(y_processed, dtype=torch.float32).unsqueeze(0)
204
+
205
+ # Get transparent explanation
206
+ explanation = st.session_state.transparent_ai.predict_with_explanation(
207
+ x_input, wavenumbers=spectrum.x_data
208
+ )
209
+
210
+ # Generate hypotheses
211
+ hypotheses = st.session_state.transparent_ai.generate_hypotheses(
212
+ explanation
213
+ )
214
+
215
+ # Store results
216
+ st.session_state.analysis_results = {
217
+ "explanation": explanation,
218
+ "hypotheses": hypotheses,
219
+ }
220
+
221
+ # Display results
222
+ render_analysis_results(explanation, hypotheses)
223
+
224
+
225
+ def render_analysis_results(explanation: PredictionExplanation, hypotheses: list):
226
+ """Render comprehensive analysis results"""
227
+ st.subheader("🎯 Prediction Results")
228
+
229
+ # Main prediction
230
+ class_names = ["Stable", "Weathered"]
231
+ predicted_class = class_names[explanation.prediction]
232
+
233
+ col1, col2, col3 = st.columns(3)
234
+ with col1:
235
+ st.metric("Prediction", predicted_class)
236
+ with col2:
237
+ st.metric("Confidence", f"{explanation.confidence:.3f}")
238
+ with col3:
239
+ confidence_emoji = (
240
+ "🟒"
241
+ if explanation.confidence_level == "HIGH"
242
+ else "🟑" if explanation.confidence_level == "MEDIUM" else "πŸ”΄"
243
+ )
244
+ st.metric("Level", f"{confidence_emoji} {explanation.confidence_level}")
245
+
246
+ # Probability distribution
247
+ st.subheader("πŸ“Š Probability Distribution")
248
+ prob_data = {"Class": class_names, "Probability": explanation.probabilities}
249
+
250
+ fig, ax = plt.subplots(figsize=(8, 5))
251
+ bars = ax.bar(prob_data["Class"], prob_data["Probability"])
252
+ ax.set_ylabel("Probability")
253
+ ax.set_title("Class Probabilities")
254
+ ax.set_ylim(0, 1)
255
+
256
+ # Color bars based on prediction
257
+ for i, bar in enumerate(bars):
258
+ if i == explanation.prediction:
259
+ bar.set_color("steelblue")
260
+ else:
261
+ bar.set_color("lightgray")
262
+
263
+ st.pyplot(fig)
264
+
265
+ # Reasoning chain
266
+ st.subheader("πŸ” AI Reasoning Chain")
267
+ for i, reasoning in enumerate(explanation.reasoning_chain):
268
+ st.write(f"{i+1}. {reasoning}")
269
+
270
+ # Feature importance
271
+ if explanation.feature_importance:
272
+ st.subheader("🎯 Feature Importance Analysis")
273
+
274
+ # Create feature importance plot
275
+ features = list(explanation.feature_importance.keys())
276
+ importances = list(explanation.feature_importance.values())
277
+
278
+ fig, ax = plt.subplots(figsize=(10, 6))
279
+ bars = ax.barh(features, importances)
280
+ ax.set_xlabel("Importance Score")
281
+ ax.set_title("Spectral Region Importance")
282
+
283
+ # Color bars based on importance
284
+ for bar, importance in zip(bars, importances):
285
+ if abs(importance) > 0.5:
286
+ bar.set_color("red")
287
+ elif abs(importance) > 0.3:
288
+ bar.set_color("orange")
289
+ else:
290
+ bar.set_color("lightblue")
291
+
292
+ plt.tight_layout()
293
+ st.pyplot(fig)
294
+
295
+ # Uncertainty analysis
296
+ st.subheader("πŸ€” Uncertainty Analysis")
297
+ for source in explanation.uncertainty_sources:
298
+ st.write(f"β€’ {source}")
299
+
300
+ # Confidence intervals
301
+ if explanation.confidence_intervals:
302
+ st.subheader("πŸ“ˆ Confidence Intervals")
303
+ for class_name, (lower, upper) in explanation.confidence_intervals.items():
304
+ st.write(f"**{class_name}:** [{lower:.3f}, {upper:.3f}]")
305
+
306
+ # AI-generated hypotheses
307
+ if hypotheses:
308
+ st.subheader("πŸ§ͺ AI-Generated Scientific Hypotheses")
309
+
310
+ for i, hypothesis in enumerate(hypotheses):
311
+ with st.expander(f"Hypothesis {i+1}: {hypothesis.statement}"):
312
+ st.write(f"**Confidence:** {hypothesis.confidence:.3f}")
313
+
314
+ st.write("**Supporting Evidence:**")
315
+ for evidence in hypothesis.supporting_evidence:
316
+ st.write(f"β€’ {evidence}")
317
+
318
+ st.write("**Testable Predictions:**")
319
+ for prediction in hypothesis.testable_predictions:
320
+ st.write(f"β€’ {prediction}")
321
+
322
+ st.write("**Suggested Experiments:**")
323
+ for experiment in hypothesis.suggested_experiments:
324
+ st.write(f"β€’ {experiment}")
325
+
326
+
327
+ def render_data_provenance():
328
+ """Render data provenance and quality information"""
329
+ if "processed_spectrum" not in st.session_state:
330
+ st.info("No processed spectrum available.")
331
+ return
332
+
333
+ st.header("πŸ“‹ Data Provenance & Quality")
334
+
335
+ spectrum = st.session_state.processed_spectrum
336
+
337
+ # Metadata display
338
+ st.subheader("πŸ“„ Spectrum Metadata")
339
+ metadata = spectrum.metadata
340
+
341
+ col1, col2 = st.columns(2)
342
+ with col1:
343
+ st.write(f"**Filename:** {metadata.filename}")
344
+ st.write(f"**Instrument:** {metadata.instrument_type}")
345
+ st.write(f"**Quality Score:** {metadata.data_quality_score:.3f}")
346
+
347
+ with col2:
348
+ if metadata.laser_wavelength:
349
+ st.write(f"**Laser Wavelength:** {metadata.laser_wavelength} nm")
350
+ if metadata.acquisition_date:
351
+ st.write(f"**Acquisition Date:** {metadata.acquisition_date}")
352
+ st.write(f"**Data Hash:** {spectrum.data_hash}")
353
+
354
+ # Provenance timeline
355
+ st.subheader("πŸ•’ Processing Timeline")
356
+
357
+ if spectrum.provenance:
358
+ for i, record in enumerate(spectrum.provenance):
359
+ with st.expander(
360
+ f"Step {i+1}: {record.operation} ({record.timestamp[:19]})"
361
+ ):
362
+ st.write(f"**Operation:** {record.operation}")
363
+ st.write(f"**Operator:** {record.operator}")
364
+ st.write(f"**Parameters:**")
365
+ for param, value in record.parameters.items():
366
+ st.write(f" - {param}: {value}")
367
+ st.write(f"**Input Hash:** {record.input_hash}")
368
+ st.write(f"**Output Hash:** {record.output_hash}")
369
+ else:
370
+ st.info("No processing operations recorded yet.")
371
+
372
+ # Quality assessment details
373
+ st.subheader("πŸ” Quality Assessment Details")
374
+
375
+ if hasattr(spectrum, "quality_metrics"):
376
+ metrics = spectrum.quality_metrics
377
+ for metric, value in metrics.items():
378
+ st.write(f"**{metric}:** {value}")
379
+ else:
380
+ st.info("Run quality assessment to see detailed metrics.")
381
+
382
+
383
+ def main():
384
+ """Main enhanced analysis interface"""
385
+ st.set_page_config(
386
+ page_title="POLYMEROS Enhanced Analysis", page_icon="πŸ”¬", layout="wide"
387
+ )
388
+
389
+ st.title("πŸ”¬ POLYMEROS Enhanced Analysis")
390
+ st.markdown("**Transparent AI with Explainability and Hypothesis Generation**")
391
+
392
+ # Initialize session
393
+ init_enhanced_analysis()
394
+
395
+ # Sidebar navigation
396
+ st.sidebar.title("πŸ§ͺ Analysis Tools")
397
+ analysis_mode = st.sidebar.selectbox(
398
+ "Select analysis mode:",
399
+ [
400
+ "Spectrum Upload & Processing",
401
+ "Transparent AI Analysis",
402
+ "Data Provenance & Quality",
403
+ ],
404
+ )
405
+
406
+ # Render selected mode
407
+ if analysis_mode == "Spectrum Upload & Processing":
408
+ render_enhanced_file_upload()
409
+ elif analysis_mode == "Transparent AI Analysis":
410
+ render_transparent_analysis()
411
+ elif analysis_mode == "Data Provenance & Quality":
412
+ render_data_provenance()
413
+
414
+ # Additional information
415
+ st.sidebar.markdown("---")
416
+ st.sidebar.markdown("**Enhanced Features:**")
417
+ st.sidebar.markdown("β€’ Complete provenance tracking")
418
+ st.sidebar.markdown("β€’ Intelligent preprocessing")
419
+ st.sidebar.markdown("β€’ Uncertainty quantification")
420
+ st.sidebar.markdown("β€’ AI hypothesis generation")
421
+ st.sidebar.markdown("β€’ Explainable predictions")
422
+
423
+ # Display current analysis status
424
+ if st.session_state.analysis_results:
425
+ st.sidebar.success("βœ… Analysis completed")
426
+ elif "processed_spectrum" in st.session_state:
427
+ st.sidebar.info("πŸ“Š Spectrum processed")
428
+ else:
429
+ st.sidebar.info("πŸ“ Ready for upload")
430
+
431
+
432
+ if __name__ == "__main__":
433
+ main()