devjas1 commited on
Commit
ec48f8d
·
1 Parent(s): 7602d7b

(FEAT)[FTIR Processing Enhancements]: Refactor FTIR-specific processing and add atmospheric/water correction methods

Browse files

- Desc:
- Refactored the FTIR-specific processing section in `preprocess_spectrum` to replace placeholder code with actual functionality.
- Added new function `remove_atmospheric_interference` for median-filter-based correction of atmospheric CO2 and H2O interference in FTIR spectra.
- Added new function `remove_water_vapor_bands` for removing water vapor interference bands using linear interpolation across common FTIR water regions.
- Modified logic in `preprocess_spectrum` to allow toggling atmospheric and water correction via the `modality_config` dictionary.
- Added utility function `apply_ftir_specific_processing` to streamline FTIR preprocessing steps.
- Cleaned up comments and improved code clarity in spectrum preprocessing routines.

Files changed (1) hide show
  1. utils/preprocessing.py +69 -10
utils/preprocessing.py CHANGED
@@ -9,8 +9,7 @@ import numpy as np
9
  from numpy.typing import DTypeLike
10
  from scipy.interpolate import interp1d
11
  from scipy.signal import savgol_filter
12
- from scipy.interpolate import interp1d
13
- from typing import Tuple, Literal
14
 
15
  TARGET_LENGTH = 500 # Frozen default per PREPROCESSING_BASELINE
16
 
@@ -32,7 +31,7 @@ MODALITY_PARAMS = {
32
  "baseline_degree": 2,
33
  "smooth_window": 13, # Slightly larger window for FTIR
34
  "smooth_polyorder": 2,
35
- "cosmic_ray_removal": False, # Could add atmospheric correction
36
  "atmospheric_correction": False, # Placeholder for future implementation
37
  },
38
  }
@@ -111,7 +110,7 @@ def validate_spectrum_range(x: np.ndarray, modality: str = "raman") -> bool:
111
  in_range = np.sum((x >= min_range) & (x <= max_range))
112
  total_points = len(x)
113
 
114
- return (in_range / total_points) >= 0.7 # At least 70% should be in range
115
 
116
 
117
  def preprocess_spectrum(
@@ -181,14 +180,12 @@ def preprocess_spectrum(
181
  if do_smooth:
182
  y_rs = smooth_spectrum(y_rs, window_length=window_length, polyorder=polyorder)
183
 
184
- # FTIR-specific processing (placeholder for future enhancements)
185
  if modality == "ftir":
186
  if modality_config.get("atmospheric_correction", False):
187
- # Placeholder for atmospheric correction
188
- pass
189
- if modality_config.get("cosmic_ray_removal", False):
190
- # Placeholder for cosmic ray removal
191
- pass
192
 
193
  if do_normalize:
194
  y_rs = normalize_spectrum(y_rs)
@@ -198,6 +195,68 @@ def preprocess_spectrum(
198
  return x_rs.astype(out_dt, copy=False), y_rs.astype(out_dt, copy=False)
199
 
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def get_modality_info(modality: str) -> dict:
202
  """Get processing parameters and validation ranges for a modality."""
203
  if modality not in MODALITY_PARAMS:
 
9
  from numpy.typing import DTypeLike
10
  from scipy.interpolate import interp1d
11
  from scipy.signal import savgol_filter
12
+ from typing import Tuple, Literal, Optional
 
13
 
14
  TARGET_LENGTH = 500 # Frozen default per PREPROCESSING_BASELINE
15
 
 
31
  "baseline_degree": 2,
32
  "smooth_window": 13, # Slightly larger window for FTIR
33
  "smooth_polyorder": 2,
34
+ "cosmic_ray_removal": False,
35
  "atmospheric_correction": False, # Placeholder for future implementation
36
  },
37
  }
 
110
  in_range = np.sum((x >= min_range) & (x <= max_range))
111
  total_points = len(x)
112
 
113
+ return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
114
 
115
 
116
  def preprocess_spectrum(
 
180
  if do_smooth:
181
  y_rs = smooth_spectrum(y_rs, window_length=window_length, polyorder=polyorder)
182
 
183
+ # FTIR-specific processing
184
  if modality == "ftir":
185
  if modality_config.get("atmospheric_correction", False):
186
+ y_rs = remove_atmospheric_interference(y_rs)
187
+ if modality_config.get("water_correction", False):
188
+ y_rs = remove_water_vapor_bands(y_rs, x_rs)
 
 
189
 
190
  if do_normalize:
191
  y_rs = normalize_spectrum(y_rs)
 
195
  return x_rs.astype(out_dt, copy=False), y_rs.astype(out_dt, copy=False)
196
 
197
 
198
+ def remove_atmospheric_interference(y: np.ndarray) -> np.ndarray:
199
+ """Remove atmospheric CO2 and H2O interference common in FTIR."""
200
+ y = np.asarray(y, dtype=float)
201
+
202
+ # Simple atmospheric correction using median filtering
203
+ # This is a basic implementation - in practice would use reference spectra
204
+ from scipy.signal import medfilt
205
+
206
+ # Apply median filter to reduce sharp atmospheric lines
207
+ corrected = medfilt(y, kernel_size=5)
208
+
209
+ # Blend with original to preserve peak structure
210
+ alpha = 0.7 # Weight for original spectrum
211
+ return alpha * y + (1 - alpha) * corrected
212
+
213
+
214
+ def remove_water_vapor_bands(y: np.ndarray, x: np.ndarray) -> np.ndarray:
215
+ """Remove water vapor interference bands in FTIR spectra."""
216
+ y = np.asarray(y, dtype=float)
217
+ x = np.asarray(x, dtype=float)
218
+
219
+ # Common water vapor regions in FTIR (cm⁻¹)
220
+ water_regions = [(3500, 3800), (1300, 1800)]
221
+
222
+ corrected_y = y.copy()
223
+
224
+ for low, high in water_regions:
225
+ # Find indices in water vapor region
226
+ mask = (x >= low) & (x <= high)
227
+ if np.any(mask):
228
+ # Simple linear interpolation across water regions
229
+ indices = np.where(mask)[0]
230
+ if len(indices) > 2:
231
+ start_idx, end_idx = indices[0], indices[-1]
232
+ if start_idx > 0 and end_idx < len(y) - 1:
233
+ # Linear interpolation between boundary points
234
+ start_val = y[start_idx - 1]
235
+ end_val = y[end_idx + 1]
236
+ interp_vals = np.linspace(start_val, end_val, len(indices))
237
+ corrected_y[mask] = interp_vals
238
+
239
+ return corrected_y
240
+
241
+
242
+ def apply_ftir_specific_processing(
243
+ x: np.ndarray,
244
+ y: np.ndarray,
245
+ atmospheric_correction: bool = False,
246
+ water_correction: bool = False,
247
+ ) -> tuple[np.ndarray, np.ndarray]:
248
+ """Apply FTIR-specific preprocessing steps."""
249
+ processed_y = y.copy()
250
+
251
+ if atmospheric_correction:
252
+ processed_y = remove_atmospheric_interference(processed_y)
253
+
254
+ if water_correction:
255
+ processed_y = remove_water_vapor_bands(processed_y, x)
256
+
257
+ return x, processed_y
258
+
259
+
260
  def get_modality_info(modality: str) -> dict:
261
  """Get processing parameters and validation ranges for a modality."""
262
  if modality not in MODALITY_PARAMS: