Spaces:
Running
Running
devjas1
commited on
Commit
·
9e65713
1
Parent(s):
5353e32
fix(display): Refactor batch results display to improve clarity and metrics presentation
Browse files- utils/multifile.py +131 -86
utils/multifile.py
CHANGED
@@ -5,6 +5,7 @@ from typing import List, Dict, Any, Tuple, Optional
|
|
5 |
import time
|
6 |
import streamlit as st
|
7 |
import numpy as np
|
|
|
8 |
|
9 |
from .preprocessing import resample_spectrum
|
10 |
from .errors import ErrorHandler, safe_execute
|
@@ -12,7 +13,9 @@ from .results_manager import ResultsManager
|
|
12 |
from .confidence import calculate_softmax_confidence
|
13 |
|
14 |
|
15 |
-
def parse_spectrum_data(
|
|
|
|
|
16 |
"""
|
17 |
Parse spectrum data from text content
|
18 |
|
@@ -27,13 +30,13 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
|
|
27 |
ValueError: If the data cannot be parsed
|
28 |
"""
|
29 |
try:
|
30 |
-
lines = text_content.strip().split(
|
31 |
|
32 |
# ==Remove empty lines and comments==
|
33 |
data_lines = []
|
34 |
for line in lines:
|
35 |
line = line.strip()
|
36 |
-
if line and not line.startswith(
|
37 |
data_lines.append(line)
|
38 |
|
39 |
if not data_lines:
|
@@ -46,8 +49,14 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
|
|
46 |
try:
|
47 |
# Handle different separators
|
48 |
parts = line.replace(",", " ").split()
|
49 |
-
numbers = [
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
if len(numbers) >= 2:
|
52 |
x_val = float(numbers[0])
|
53 |
y_val = float(numbers[1])
|
@@ -56,12 +65,14 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
|
|
56 |
|
57 |
except ValueError:
|
58 |
ErrorHandler.log_warning(
|
59 |
-
f"Could not parse line {i+1}: {line}", f"Parsing {filename}"
|
|
|
60 |
continue
|
61 |
|
62 |
if len(x_vals) < 10: # ==Need minimum points for interpolation==
|
63 |
raise ValueError(
|
64 |
-
f"Insufficient data points ({len(x_vals)}). Need at least 10 points."
|
|
|
65 |
|
66 |
x = np.array(x_vals)
|
67 |
y = np.array(y_vals)
|
@@ -77,7 +88,8 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
|
|
77 |
# Check reasonable range for Raman spectroscopy
|
78 |
if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100:
|
79 |
raise ValueError(
|
80 |
-
f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100"
|
|
|
81 |
|
82 |
return x, y
|
83 |
|
@@ -91,7 +103,7 @@ def process_single_file(
|
|
91 |
model_choice: str,
|
92 |
load_model_func,
|
93 |
run_inference_func,
|
94 |
-
label_file_func
|
95 |
) -> Optional[Dict[str, Any]]:
|
96 |
"""
|
97 |
Process a single spectrum file
|
@@ -116,7 +128,7 @@ def process_single_file(
|
|
116 |
text_content,
|
117 |
filename,
|
118 |
error_context=f"parsing {filename}",
|
119 |
-
show_error=False
|
120 |
)
|
121 |
|
122 |
if not success or result is None:
|
@@ -129,9 +141,9 @@ def process_single_file(
|
|
129 |
resample_spectrum,
|
130 |
x_raw,
|
131 |
y_raw,
|
132 |
-
500,
|
133 |
error_context=f"resampling {filename}",
|
134 |
-
show_error=False
|
135 |
)
|
136 |
|
137 |
if not success or result is None:
|
@@ -145,20 +157,22 @@ def process_single_file(
|
|
145 |
y_resampled,
|
146 |
model_choice,
|
147 |
error_context=f"inference on {filename}",
|
148 |
-
show_error=False
|
149 |
)
|
150 |
|
151 |
if not success or result is None:
|
152 |
ErrorHandler.log_error(
|
153 |
-
Exception("Inference failed"), f"processing {filename}"
|
|
|
154 |
return None
|
155 |
|
156 |
prediction, logits_list, probs, inference_time, logits = result
|
157 |
|
158 |
# ==Calculate confidence==
|
159 |
if logits is not None:
|
160 |
-
probs_np, max_confidence, confidence_level, confidence_emoji =
|
161 |
-
logits)
|
|
|
162 |
else:
|
163 |
probs_np = np.array([])
|
164 |
max_confidence = 0.0
|
@@ -202,7 +216,7 @@ def process_single_file(
|
|
202 |
"filename": filename,
|
203 |
"success": False,
|
204 |
"error": str(e),
|
205 |
-
"processing_time": time.time() - start_time
|
206 |
}
|
207 |
|
208 |
|
@@ -212,7 +226,7 @@ def process_multiple_files(
|
|
212 |
load_model_func,
|
213 |
run_inference_func,
|
214 |
label_file_func,
|
215 |
-
progress_callback=None
|
216 |
) -> List[Dict[str, Any]]:
|
217 |
"""
|
218 |
Process multiple uploaded files
|
@@ -240,8 +254,7 @@ def process_multiple_files(
|
|
240 |
try:
|
241 |
# ==Read file content==
|
242 |
raw = uploaded_file.read()
|
243 |
-
text_content = raw.decode(
|
244 |
-
'utf-8') if isinstance(raw, bytes) else raw
|
245 |
|
246 |
# ==Process the file==
|
247 |
result = process_single_file(
|
@@ -250,7 +263,7 @@ def process_multiple_files(
|
|
250 |
model_choice,
|
251 |
load_model_func,
|
252 |
run_inference_func,
|
253 |
-
label_file_func
|
254 |
)
|
255 |
|
256 |
if result:
|
@@ -269,85 +282,117 @@ def process_multiple_files(
|
|
269 |
processing_time=result["processing_time"],
|
270 |
metadata={
|
271 |
"confidence_level": result["confidence_level"],
|
272 |
-
"confidence_emoji": result["confidence_emoji"]
|
273 |
-
}
|
274 |
)
|
275 |
|
276 |
except Exception as e:
|
277 |
ErrorHandler.log_error(e, f"reading file {uploaded_file.name}")
|
278 |
-
results.append(
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
283 |
|
284 |
if progress_callback:
|
285 |
progress_callback(total_files, total_files, "Complete")
|
286 |
|
287 |
ErrorHandler.log_info(
|
288 |
-
f"Completed batch processing: {sum(1 for r in results if r.get('success', False))}/{total_files} successful"
|
|
|
289 |
|
290 |
return results
|
291 |
|
292 |
|
293 |
-
def display_batch_results(
|
294 |
-
"""
|
295 |
-
|
296 |
-
|
297 |
-
Args:
|
298 |
-
results: List of processing results
|
299 |
-
"""
|
300 |
-
if not results:
|
301 |
-
st.warning("No results to display")
|
302 |
return
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
#
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
|
352 |
|
353 |
def create_batch_uploader() -> List:
|
@@ -362,7 +407,7 @@ def create_batch_uploader() -> List:
|
|
362 |
type="txt",
|
363 |
accept_multiple_files=True,
|
364 |
help="Select multiple .txt files with wavenumber and intensity columns",
|
365 |
-
key="batch_uploader"
|
366 |
)
|
367 |
|
368 |
return uploaded_files if uploaded_files else []
|
|
|
5 |
import time
|
6 |
import streamlit as st
|
7 |
import numpy as np
|
8 |
+
import pandas as pd
|
9 |
|
10 |
from .preprocessing import resample_spectrum
|
11 |
from .errors import ErrorHandler, safe_execute
|
|
|
13 |
from .confidence import calculate_softmax_confidence
|
14 |
|
15 |
|
16 |
+
def parse_spectrum_data(
|
17 |
+
text_content: str, filename: str = "unknown"
|
18 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
19 |
"""
|
20 |
Parse spectrum data from text content
|
21 |
|
|
|
30 |
ValueError: If the data cannot be parsed
|
31 |
"""
|
32 |
try:
|
33 |
+
lines = text_content.strip().split("\n")
|
34 |
|
35 |
# ==Remove empty lines and comments==
|
36 |
data_lines = []
|
37 |
for line in lines:
|
38 |
line = line.strip()
|
39 |
+
if line and not line.startswith("#") and not line.startswith("%"):
|
40 |
data_lines.append(line)
|
41 |
|
42 |
if not data_lines:
|
|
|
49 |
try:
|
50 |
# Handle different separators
|
51 |
parts = line.replace(",", " ").split()
|
52 |
+
numbers = [
|
53 |
+
p
|
54 |
+
for p in parts
|
55 |
+
if p.replace(".", "", 1)
|
56 |
+
.replace("-", "", 1)
|
57 |
+
.replace("+", "", 1)
|
58 |
+
.isdigit()
|
59 |
+
]
|
60 |
if len(numbers) >= 2:
|
61 |
x_val = float(numbers[0])
|
62 |
y_val = float(numbers[1])
|
|
|
65 |
|
66 |
except ValueError:
|
67 |
ErrorHandler.log_warning(
|
68 |
+
f"Could not parse line {i+1}: {line}", f"Parsing {filename}"
|
69 |
+
)
|
70 |
continue
|
71 |
|
72 |
if len(x_vals) < 10: # ==Need minimum points for interpolation==
|
73 |
raise ValueError(
|
74 |
+
f"Insufficient data points ({len(x_vals)}). Need at least 10 points."
|
75 |
+
)
|
76 |
|
77 |
x = np.array(x_vals)
|
78 |
y = np.array(y_vals)
|
|
|
88 |
# Check reasonable range for Raman spectroscopy
|
89 |
if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100:
|
90 |
raise ValueError(
|
91 |
+
f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100"
|
92 |
+
)
|
93 |
|
94 |
return x, y
|
95 |
|
|
|
103 |
model_choice: str,
|
104 |
load_model_func,
|
105 |
run_inference_func,
|
106 |
+
label_file_func,
|
107 |
) -> Optional[Dict[str, Any]]:
|
108 |
"""
|
109 |
Process a single spectrum file
|
|
|
128 |
text_content,
|
129 |
filename,
|
130 |
error_context=f"parsing {filename}",
|
131 |
+
show_error=False,
|
132 |
)
|
133 |
|
134 |
if not success or result is None:
|
|
|
141 |
resample_spectrum,
|
142 |
x_raw,
|
143 |
y_raw,
|
144 |
+
500, # TARGET_LEN
|
145 |
error_context=f"resampling {filename}",
|
146 |
+
show_error=False,
|
147 |
)
|
148 |
|
149 |
if not success or result is None:
|
|
|
157 |
y_resampled,
|
158 |
model_choice,
|
159 |
error_context=f"inference on {filename}",
|
160 |
+
show_error=False,
|
161 |
)
|
162 |
|
163 |
if not success or result is None:
|
164 |
ErrorHandler.log_error(
|
165 |
+
Exception("Inference failed"), f"processing {filename}"
|
166 |
+
)
|
167 |
return None
|
168 |
|
169 |
prediction, logits_list, probs, inference_time, logits = result
|
170 |
|
171 |
# ==Calculate confidence==
|
172 |
if logits is not None:
|
173 |
+
probs_np, max_confidence, confidence_level, confidence_emoji = (
|
174 |
+
calculate_softmax_confidence(logits)
|
175 |
+
)
|
176 |
else:
|
177 |
probs_np = np.array([])
|
178 |
max_confidence = 0.0
|
|
|
216 |
"filename": filename,
|
217 |
"success": False,
|
218 |
"error": str(e),
|
219 |
+
"processing_time": time.time() - start_time,
|
220 |
}
|
221 |
|
222 |
|
|
|
226 |
load_model_func,
|
227 |
run_inference_func,
|
228 |
label_file_func,
|
229 |
+
progress_callback=None,
|
230 |
) -> List[Dict[str, Any]]:
|
231 |
"""
|
232 |
Process multiple uploaded files
|
|
|
254 |
try:
|
255 |
# ==Read file content==
|
256 |
raw = uploaded_file.read()
|
257 |
+
text_content = raw.decode("utf-8") if isinstance(raw, bytes) else raw
|
|
|
258 |
|
259 |
# ==Process the file==
|
260 |
result = process_single_file(
|
|
|
263 |
model_choice,
|
264 |
load_model_func,
|
265 |
run_inference_func,
|
266 |
+
label_file_func,
|
267 |
)
|
268 |
|
269 |
if result:
|
|
|
282 |
processing_time=result["processing_time"],
|
283 |
metadata={
|
284 |
"confidence_level": result["confidence_level"],
|
285 |
+
"confidence_emoji": result["confidence_emoji"],
|
286 |
+
},
|
287 |
)
|
288 |
|
289 |
except Exception as e:
|
290 |
ErrorHandler.log_error(e, f"reading file {uploaded_file.name}")
|
291 |
+
results.append(
|
292 |
+
{
|
293 |
+
"filename": uploaded_file.name,
|
294 |
+
"success": False,
|
295 |
+
"error": f"Failed to read file: {str(e)}",
|
296 |
+
}
|
297 |
+
)
|
298 |
|
299 |
if progress_callback:
|
300 |
progress_callback(total_files, total_files, "Complete")
|
301 |
|
302 |
ErrorHandler.log_info(
|
303 |
+
f"Completed batch processing: {sum(1 for r in results if r.get('success', False))}/{total_files} successful"
|
304 |
+
)
|
305 |
|
306 |
return results
|
307 |
|
308 |
|
309 |
+
def display_batch_results(batch_results: list):
|
310 |
+
"""Renders a clean, consolidated summary of batch processing results using metrics and a pandas DataFrame replacing the old expander list"""
|
311 |
+
if not batch_results:
|
312 |
+
st.info("No batch results to display.")
|
|
|
|
|
|
|
|
|
|
|
313 |
return
|
314 |
|
315 |
+
successful_runs = [r for r in batch_results if r.get("success", False)]
|
316 |
+
failed_runs = [r for r in batch_results if not r.get("success", False)]
|
317 |
+
|
318 |
+
# 1. High Level Metrics
|
319 |
+
st.markdown("###### Batch Summary")
|
320 |
+
metric_cols = st.columns(3)
|
321 |
+
metric_cols[0].metric("Total Files Processed", f"{len(batch_results)}")
|
322 |
+
metric_cols[1].metric("✔️ Successful", f"{len(successful_runs)}")
|
323 |
+
metric_cols[2].metric("❌ Failed", f"{len(failed_runs)}")
|
324 |
+
|
325 |
+
# 3 Hidden Failure Details
|
326 |
+
if failed_runs:
|
327 |
+
with st.expander(
|
328 |
+
f"View details for {len(failed_runs)} failed file(s)", expanded=False
|
329 |
+
):
|
330 |
+
for r in failed_runs:
|
331 |
+
st.error(f"**File:** `{r.get('filename', 'unknown')}`")
|
332 |
+
st.caption(
|
333 |
+
f"Reason for failure: {r.get('error', 'No details provided')}"
|
334 |
+
)
|
335 |
+
|
336 |
+
|
337 |
+
# Legacy display batch results
|
338 |
+
# def display_batch_results(results: List[Dict[str, Any]]) -> None:
|
339 |
+
# """
|
340 |
+
# Display batch processing results in the UI
|
341 |
+
|
342 |
+
# Args:
|
343 |
+
# results: List of processing results
|
344 |
+
# """
|
345 |
+
# if not results:
|
346 |
+
# st.warning("No results to display")
|
347 |
+
# return
|
348 |
+
|
349 |
+
# successful = [r for r in results if r.get("success", False)]
|
350 |
+
# failed = [r for r in results if not r.get("success", False)]
|
351 |
+
|
352 |
+
# # ==Summary==
|
353 |
+
# col1, col2, col3 = st.columns(3, border=True)
|
354 |
+
# with col1:
|
355 |
+
# st.metric("Total Files", len(results))
|
356 |
+
# with col2:
|
357 |
+
# st.metric("Successful", len(successful),
|
358 |
+
# delta=f"{len(successful)/len(results)*100:.1f}%")
|
359 |
+
# with col3:
|
360 |
+
# st.metric("Failed", len(
|
361 |
+
# failed), delta=f"-{len(failed)/len(results)*100:.1f}%" if failed else "0%")
|
362 |
+
|
363 |
+
# # ==Results tabs==
|
364 |
+
# tab1, tab2 = st.tabs(["✅Successful", "❌ Failed"], width="stretch")
|
365 |
+
|
366 |
+
# with tab1:
|
367 |
+
# with st.expander("Successful"):
|
368 |
+
# if successful:
|
369 |
+
# for result in successful:
|
370 |
+
# with st.expander(f"{result['filename']}", expanded=False):
|
371 |
+
# col1, col2 = st.columns(2)
|
372 |
+
# with col1:
|
373 |
+
# st.write(
|
374 |
+
# f"**Prediction:** {result['predicted_class']}")
|
375 |
+
# st.write(
|
376 |
+
# f"**Confidence:** {result['confidence_emoji']} {result['confidence_level']} ({result['confidence']:.3f})")
|
377 |
+
# with col2:
|
378 |
+
# st.write(
|
379 |
+
# f"**Processing Time:** {result['processing_time']:.3f}s")
|
380 |
+
# if result['ground_truth'] is not None:
|
381 |
+
# gt_label = {0: "Stable", 1: "Weathered"}.get(
|
382 |
+
# result['ground_truth'], "Unknown")
|
383 |
+
# correct = "✅" if result['prediction'] == result['ground_truth'] else "❌"
|
384 |
+
# st.write(
|
385 |
+
# f"**Ground Truth:** {gt_label} {correct}")
|
386 |
+
# else:
|
387 |
+
# st.info("No successful results")
|
388 |
+
|
389 |
+
# with tab2:
|
390 |
+
# if failed:
|
391 |
+
# for result in failed:
|
392 |
+
# with st.expander(f"❌ {result['filename']}", expanded=False):
|
393 |
+
# st.error(f"Error: {result.get('error', 'Unknown error')}")
|
394 |
+
# else:
|
395 |
+
# st.success("No failed files!")
|
396 |
|
397 |
|
398 |
def create_batch_uploader() -> List:
|
|
|
407 |
type="txt",
|
408 |
accept_multiple_files=True,
|
409 |
help="Select multiple .txt files with wavenumber and intensity columns",
|
410 |
+
key="batch_uploader",
|
411 |
)
|
412 |
|
413 |
return uploaded_files if uploaded_files else []
|