Spaces:
Sleeping
(FEAT)[FTIR Processing Enhancements]: Refactor FTIR-specific processing and add atmospheric/water correction methods
Browse files- Desc:
- Refactored the FTIR-specific processing section in `preprocess_spectrum` to replace placeholder code with actual functionality.
- Added new function `remove_atmospheric_interference` for median-filter-based correction of atmospheric CO2 and H2O interference in FTIR spectra.
- Added new function `remove_water_vapor_bands` for removing water vapor interference bands using linear interpolation across common FTIR water regions.
- Modified logic in `preprocess_spectrum` to allow toggling atmospheric and water correction via the `modality_config` dictionary.
- Added utility function `apply_ftir_specific_processing` to streamline FTIR preprocessing steps.
- Cleaned up comments and improved code clarity in spectrum preprocessing routines.
- utils/preprocessing.py +69 -10
|
@@ -9,8 +9,7 @@ import numpy as np
|
|
| 9 |
from numpy.typing import DTypeLike
|
| 10 |
from scipy.interpolate import interp1d
|
| 11 |
from scipy.signal import savgol_filter
|
| 12 |
-
from
|
| 13 |
-
from typing import Tuple, Literal
|
| 14 |
|
| 15 |
TARGET_LENGTH = 500 # Frozen default per PREPROCESSING_BASELINE
|
| 16 |
|
|
@@ -32,7 +31,7 @@ MODALITY_PARAMS = {
|
|
| 32 |
"baseline_degree": 2,
|
| 33 |
"smooth_window": 13, # Slightly larger window for FTIR
|
| 34 |
"smooth_polyorder": 2,
|
| 35 |
-
"cosmic_ray_removal": False,
|
| 36 |
"atmospheric_correction": False, # Placeholder for future implementation
|
| 37 |
},
|
| 38 |
}
|
|
@@ -111,7 +110,7 @@ def validate_spectrum_range(x: np.ndarray, modality: str = "raman") -> bool:
|
|
| 111 |
in_range = np.sum((x >= min_range) & (x <= max_range))
|
| 112 |
total_points = len(x)
|
| 113 |
|
| 114 |
-
return (in_range / total_points) >= 0.7 # At least 70% should be in range
|
| 115 |
|
| 116 |
|
| 117 |
def preprocess_spectrum(
|
|
@@ -181,14 +180,12 @@ def preprocess_spectrum(
|
|
| 181 |
if do_smooth:
|
| 182 |
y_rs = smooth_spectrum(y_rs, window_length=window_length, polyorder=polyorder)
|
| 183 |
|
| 184 |
-
# FTIR-specific processing
|
| 185 |
if modality == "ftir":
|
| 186 |
if modality_config.get("atmospheric_correction", False):
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
# Placeholder for cosmic ray removal
|
| 191 |
-
pass
|
| 192 |
|
| 193 |
if do_normalize:
|
| 194 |
y_rs = normalize_spectrum(y_rs)
|
|
@@ -198,6 +195,68 @@ def preprocess_spectrum(
|
|
| 198 |
return x_rs.astype(out_dt, copy=False), y_rs.astype(out_dt, copy=False)
|
| 199 |
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
def get_modality_info(modality: str) -> dict:
|
| 202 |
"""Get processing parameters and validation ranges for a modality."""
|
| 203 |
if modality not in MODALITY_PARAMS:
|
|
|
|
| 9 |
from numpy.typing import DTypeLike
|
| 10 |
from scipy.interpolate import interp1d
|
| 11 |
from scipy.signal import savgol_filter
|
| 12 |
+
from typing import Tuple, Literal, Optional
|
|
|
|
| 13 |
|
| 14 |
TARGET_LENGTH = 500 # Frozen default per PREPROCESSING_BASELINE
|
| 15 |
|
|
|
|
| 31 |
"baseline_degree": 2,
|
| 32 |
"smooth_window": 13, # Slightly larger window for FTIR
|
| 33 |
"smooth_polyorder": 2,
|
| 34 |
+
"cosmic_ray_removal": False,
|
| 35 |
"atmospheric_correction": False, # Placeholder for future implementation
|
| 36 |
},
|
| 37 |
}
|
|
|
|
| 110 |
in_range = np.sum((x >= min_range) & (x <= max_range))
|
| 111 |
total_points = len(x)
|
| 112 |
|
| 113 |
+
return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
|
| 114 |
|
| 115 |
|
| 116 |
def preprocess_spectrum(
|
|
|
|
| 180 |
if do_smooth:
|
| 181 |
y_rs = smooth_spectrum(y_rs, window_length=window_length, polyorder=polyorder)
|
| 182 |
|
| 183 |
+
# FTIR-specific processing
|
| 184 |
if modality == "ftir":
|
| 185 |
if modality_config.get("atmospheric_correction", False):
|
| 186 |
+
y_rs = remove_atmospheric_interference(y_rs)
|
| 187 |
+
if modality_config.get("water_correction", False):
|
| 188 |
+
y_rs = remove_water_vapor_bands(y_rs, x_rs)
|
|
|
|
|
|
|
| 189 |
|
| 190 |
if do_normalize:
|
| 191 |
y_rs = normalize_spectrum(y_rs)
|
|
|
|
| 195 |
return x_rs.astype(out_dt, copy=False), y_rs.astype(out_dt, copy=False)
|
| 196 |
|
| 197 |
|
| 198 |
+
def remove_atmospheric_interference(y: np.ndarray) -> np.ndarray:
|
| 199 |
+
"""Remove atmospheric CO2 and H2O interference common in FTIR."""
|
| 200 |
+
y = np.asarray(y, dtype=float)
|
| 201 |
+
|
| 202 |
+
# Simple atmospheric correction using median filtering
|
| 203 |
+
# This is a basic implementation - in practice would use reference spectra
|
| 204 |
+
from scipy.signal import medfilt
|
| 205 |
+
|
| 206 |
+
# Apply median filter to reduce sharp atmospheric lines
|
| 207 |
+
corrected = medfilt(y, kernel_size=5)
|
| 208 |
+
|
| 209 |
+
# Blend with original to preserve peak structure
|
| 210 |
+
alpha = 0.7 # Weight for original spectrum
|
| 211 |
+
return alpha * y + (1 - alpha) * corrected
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def remove_water_vapor_bands(y: np.ndarray, x: np.ndarray) -> np.ndarray:
|
| 215 |
+
"""Remove water vapor interference bands in FTIR spectra."""
|
| 216 |
+
y = np.asarray(y, dtype=float)
|
| 217 |
+
x = np.asarray(x, dtype=float)
|
| 218 |
+
|
| 219 |
+
# Common water vapor regions in FTIR (cm⁻¹)
|
| 220 |
+
water_regions = [(3500, 3800), (1300, 1800)]
|
| 221 |
+
|
| 222 |
+
corrected_y = y.copy()
|
| 223 |
+
|
| 224 |
+
for low, high in water_regions:
|
| 225 |
+
# Find indices in water vapor region
|
| 226 |
+
mask = (x >= low) & (x <= high)
|
| 227 |
+
if np.any(mask):
|
| 228 |
+
# Simple linear interpolation across water regions
|
| 229 |
+
indices = np.where(mask)[0]
|
| 230 |
+
if len(indices) > 2:
|
| 231 |
+
start_idx, end_idx = indices[0], indices[-1]
|
| 232 |
+
if start_idx > 0 and end_idx < len(y) - 1:
|
| 233 |
+
# Linear interpolation between boundary points
|
| 234 |
+
start_val = y[start_idx - 1]
|
| 235 |
+
end_val = y[end_idx + 1]
|
| 236 |
+
interp_vals = np.linspace(start_val, end_val, len(indices))
|
| 237 |
+
corrected_y[mask] = interp_vals
|
| 238 |
+
|
| 239 |
+
return corrected_y
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def apply_ftir_specific_processing(
|
| 243 |
+
x: np.ndarray,
|
| 244 |
+
y: np.ndarray,
|
| 245 |
+
atmospheric_correction: bool = False,
|
| 246 |
+
water_correction: bool = False,
|
| 247 |
+
) -> tuple[np.ndarray, np.ndarray]:
|
| 248 |
+
"""Apply FTIR-specific preprocessing steps."""
|
| 249 |
+
processed_y = y.copy()
|
| 250 |
+
|
| 251 |
+
if atmospheric_correction:
|
| 252 |
+
processed_y = remove_atmospheric_interference(processed_y)
|
| 253 |
+
|
| 254 |
+
if water_correction:
|
| 255 |
+
processed_y = remove_water_vapor_bands(processed_y, x)
|
| 256 |
+
|
| 257 |
+
return x, processed_y
|
| 258 |
+
|
| 259 |
+
|
| 260 |
def get_modality_info(modality: str) -> dict:
|
| 261 |
"""Get processing parameters and validation ranges for a modality."""
|
| 262 |
if modality not in MODALITY_PARAMS:
|