Spaces:
Sleeping
Sleeping
devjas1
commited on
Commit
·
2a2cf15
1
Parent(s):
81ec5ec
(FEAT)[Add validate_spectrum_modality Utility]:
Browse files- New function to check if spectrum data matches selected modality (raman/ftir).
- Validates wavenumber ranges, coverage, and typical modality properties.
- Returns validation status and details of any mismatches.
- utils/preprocessing.py +63 -0
utils/preprocessing.py
CHANGED
@@ -113,6 +113,69 @@ def validate_spectrum_range(x: np.ndarray, modality: str = "raman") -> bool:
|
|
113 |
return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
|
114 |
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
def preprocess_spectrum(
|
117 |
x: np.ndarray,
|
118 |
y: np.ndarray,
|
|
|
113 |
return bool((in_range / total_points) >= 0.7) # At least 70% should be in range
|
114 |
|
115 |
|
116 |
+
def validate_spectrum_modality(
|
117 |
+
x_data: np.ndarray, y_data: np.ndarray, selected_modality: str
|
118 |
+
) -> Tuple[bool, list[str]]:
|
119 |
+
"""
|
120 |
+
Validate that spectrum characteristics match the selected modality.
|
121 |
+
|
122 |
+
Args:
|
123 |
+
x_data: Wavenumber array (cm⁻¹)
|
124 |
+
y_data: Intensity array
|
125 |
+
selected_modality: Selected modality ('raman' or 'ftir')
|
126 |
+
|
127 |
+
Returns:
|
128 |
+
Tuple of (is_valid, list_of_issues)
|
129 |
+
"""
|
130 |
+
x_data = np.asarray(x_data)
|
131 |
+
y_data = np.asarray(y_data)
|
132 |
+
issues = []
|
133 |
+
|
134 |
+
if selected_modality not in MODALITY_RANGES:
|
135 |
+
issues.append(f"Unknown modality: {selected_modality}")
|
136 |
+
return False, issues
|
137 |
+
|
138 |
+
expected_min, expected_max = MODALITY_RANGES[selected_modality]
|
139 |
+
actual_min, actual_max = np.min(x_data), np.max(x_data)
|
140 |
+
|
141 |
+
# Check wavenumber range
|
142 |
+
if actual_min < expected_min * 0.8: # Allow 20% tolerance
|
143 |
+
issues.append(
|
144 |
+
f"Minimum wavenumber ({actual_min:.0f} cm⁻¹) is below typical {selected_modality.upper()} range (>{expected_min} cm⁻¹)"
|
145 |
+
)
|
146 |
+
|
147 |
+
if actual_max > expected_max * 1.2: # Allow 20% tolerance
|
148 |
+
issues.append(
|
149 |
+
f"Maximum wavenumber ({actual_max:.0f} cm⁻¹) is above typical {selected_modality.upper()} range (<{expected_max} cm⁻¹)"
|
150 |
+
)
|
151 |
+
|
152 |
+
# Check for reasonable data range coverage
|
153 |
+
data_range = actual_max - actual_min
|
154 |
+
expected_range = expected_max - expected_min
|
155 |
+
if data_range < expected_range * 0.3: # Should cover at least 30% of expected range
|
156 |
+
issues.append(
|
157 |
+
f"Data range ({data_range:.0f} cm⁻¹) seems narrow for {selected_modality.upper()} spectroscopy"
|
158 |
+
)
|
159 |
+
|
160 |
+
# FTIR-specific checks
|
161 |
+
if selected_modality == "ftir":
|
162 |
+
# Check for typical FTIR characteristics
|
163 |
+
if actual_min > 1000: # FTIR usually includes fingerprint region
|
164 |
+
issues.append(
|
165 |
+
"FTIR data should typically include fingerprint region (400-1500 cm⁻¹)"
|
166 |
+
)
|
167 |
+
|
168 |
+
# Raman-specific checks
|
169 |
+
if selected_modality == "raman":
|
170 |
+
# Check for typical Raman characteristics
|
171 |
+
if actual_max < 1000: # Raman usually extends to higher wavenumbers
|
172 |
+
issues.append(
|
173 |
+
"Raman data typically extends to higher wavenumbers (>1000 cm⁻¹)"
|
174 |
+
)
|
175 |
+
|
176 |
+
return len(issues) == 0, issues
|
177 |
+
|
178 |
+
|
179 |
def preprocess_spectrum(
|
180 |
x: np.ndarray,
|
181 |
y: np.ndarray,
|