devjas1 commited on
Commit
2a2cf15
·
1 Parent(s): 81ec5ec

(FEAT)[Add validate_spectrum_modality Utility]:

Browse files

- New function to check if spectrum data matches selected modality (raman/ftir).
- Validates wavenumber ranges, coverage, and typical modality properties.
- Returns validation status and details of any mismatches.

Files changed (1) hide show
  1. utils/preprocessing.py +63 -0
utils/preprocessing.py CHANGED
@@ -113,6 +113,69 @@ def validate_spectrum_range(x: np.ndarray, modality: str = "raman") -> bool:
113
  return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
114
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  def preprocess_spectrum(
117
  x: np.ndarray,
118
  y: np.ndarray,
 
113
  return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
114
 
115
 
116
+ def validate_spectrum_modality(
117
+ x_data: np.ndarray, y_data: np.ndarray, selected_modality: str
118
+ ) -> Tuple[bool, list[str]]:
119
+ """
120
+ Validate that spectrum characteristics match the selected modality.
121
+
122
+ Args:
123
+ x_data: Wavenumber array (cm⁻¹)
124
+ y_data: Intensity array
125
+ selected_modality: Selected modality ('raman' or 'ftir')
126
+
127
+ Returns:
128
+ Tuple of (is_valid, list_of_issues)
129
+ """
130
+ x_data = np.asarray(x_data)
131
+ y_data = np.asarray(y_data)
132
+ issues = []
133
+
134
+ if selected_modality not in MODALITY_RANGES:
135
+ issues.append(f"Unknown modality: {selected_modality}")
136
+ return False, issues
137
+
138
+ expected_min, expected_max = MODALITY_RANGES[selected_modality]
139
+ actual_min, actual_max = np.min(x_data), np.max(x_data)
140
+
141
+ # Check wavenumber range
142
+ if actual_min < expected_min * 0.8: # Allow 20% tolerance
143
+ issues.append(
144
+ f"Minimum wavenumber ({actual_min:.0f} cm⁻¹) is below typical {selected_modality.upper()} range (>{expected_min} cm⁻¹)"
145
+ )
146
+
147
+ if actual_max > expected_max * 1.2: # Allow 20% tolerance
148
+ issues.append(
149
+ f"Maximum wavenumber ({actual_max:.0f} cm⁻¹) is above typical {selected_modality.upper()} range (<{expected_max} cm⁻¹)"
150
+ )
151
+
152
+ # Check for reasonable data range coverage
153
+ data_range = actual_max - actual_min
154
+ expected_range = expected_max - expected_min
155
+ if data_range < expected_range * 0.3: # Should cover at least 30% of expected range
156
+ issues.append(
157
+ f"Data range ({data_range:.0f} cm⁻¹) seems narrow for {selected_modality.upper()} spectroscopy"
158
+ )
159
+
160
+ # FTIR-specific checks
161
+ if selected_modality == "ftir":
162
+ # Check for typical FTIR characteristics
163
+ if actual_min > 1000: # FTIR usually includes fingerprint region
164
+ issues.append(
165
+ "FTIR data should typically include fingerprint region (400-1500 cm⁻¹)"
166
+ )
167
+
168
+ # Raman-specific checks
169
+ if selected_modality == "raman":
170
+ # Check for typical Raman characteristics
171
+ if actual_max < 1000: # Raman usually extends to higher wavenumbers
172
+ issues.append(
173
+ "Raman data typically extends to higher wavenumbers (>1000 cm⁻¹)"
174
+ )
175
+
176
+ return len(issues) == 0, issues
177
+
178
+
179
  def preprocess_spectrum(
180
  x: np.ndarray,
181
  y: np.ndarray,