devjas1 commited on
Commit
9e65713
·
1 Parent(s): 5353e32

fix(display): Refactor batch results display to improve clarity and metrics presentation

Browse files
Files changed (1) hide show
  1. utils/multifile.py +131 -86
utils/multifile.py CHANGED
@@ -5,6 +5,7 @@ from typing import List, Dict, Any, Tuple, Optional
5
  import time
6
  import streamlit as st
7
  import numpy as np
 
8
 
9
  from .preprocessing import resample_spectrum
10
  from .errors import ErrorHandler, safe_execute
@@ -12,7 +13,9 @@ from .results_manager import ResultsManager
12
  from .confidence import calculate_softmax_confidence
13
 
14
 
15
- def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[np.ndarray, np.ndarray]:
 
 
16
  """
17
  Parse spectrum data from text content
18
 
@@ -27,13 +30,13 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
27
  ValueError: If the data cannot be parsed
28
  """
29
  try:
30
- lines = text_content.strip().split('\n')
31
 
32
  # ==Remove empty lines and comments==
33
  data_lines = []
34
  for line in lines:
35
  line = line.strip()
36
- if line and not line.startswith('#') and not line.startswith('%'):
37
  data_lines.append(line)
38
 
39
  if not data_lines:
@@ -46,8 +49,14 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
46
  try:
47
  # Handle different separators
48
  parts = line.replace(",", " ").split()
49
- numbers = [p for p in parts if p.replace('.', '', 1).replace(
50
- '-', '', 1).replace('+', '', 1).isdigit()]
 
 
 
 
 
 
51
  if len(numbers) >= 2:
52
  x_val = float(numbers[0])
53
  y_val = float(numbers[1])
@@ -56,12 +65,14 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
56
 
57
  except ValueError:
58
  ErrorHandler.log_warning(
59
- f"Could not parse line {i+1}: {line}", f"Parsing {filename}")
 
60
  continue
61
 
62
  if len(x_vals) < 10: # ==Need minimum points for interpolation==
63
  raise ValueError(
64
- f"Insufficient data points ({len(x_vals)}). Need at least 10 points.")
 
65
 
66
  x = np.array(x_vals)
67
  y = np.array(y_vals)
@@ -77,7 +88,8 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
77
  # Check reasonable range for Raman spectroscopy
78
  if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100:
79
  raise ValueError(
80
- f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100")
 
81
 
82
  return x, y
83
 
@@ -91,7 +103,7 @@ def process_single_file(
91
  model_choice: str,
92
  load_model_func,
93
  run_inference_func,
94
- label_file_func
95
  ) -> Optional[Dict[str, Any]]:
96
  """
97
  Process a single spectrum file
@@ -116,7 +128,7 @@ def process_single_file(
116
  text_content,
117
  filename,
118
  error_context=f"parsing {filename}",
119
- show_error=False
120
  )
121
 
122
  if not success or result is None:
@@ -129,9 +141,9 @@ def process_single_file(
129
  resample_spectrum,
130
  x_raw,
131
  y_raw,
132
- 500, # TARGET_LEN
133
  error_context=f"resampling {filename}",
134
- show_error=False
135
  )
136
 
137
  if not success or result is None:
@@ -145,20 +157,22 @@ def process_single_file(
145
  y_resampled,
146
  model_choice,
147
  error_context=f"inference on {filename}",
148
- show_error=False
149
  )
150
 
151
  if not success or result is None:
152
  ErrorHandler.log_error(
153
- Exception("Inference failed"), f"processing {filename}")
 
154
  return None
155
 
156
  prediction, logits_list, probs, inference_time, logits = result
157
 
158
  # ==Calculate confidence==
159
  if logits is not None:
160
- probs_np, max_confidence, confidence_level, confidence_emoji = calculate_softmax_confidence(
161
- logits)
 
162
  else:
163
  probs_np = np.array([])
164
  max_confidence = 0.0
@@ -202,7 +216,7 @@ def process_single_file(
202
  "filename": filename,
203
  "success": False,
204
  "error": str(e),
205
- "processing_time": time.time() - start_time
206
  }
207
 
208
 
@@ -212,7 +226,7 @@ def process_multiple_files(
212
  load_model_func,
213
  run_inference_func,
214
  label_file_func,
215
- progress_callback=None
216
  ) -> List[Dict[str, Any]]:
217
  """
218
  Process multiple uploaded files
@@ -240,8 +254,7 @@ def process_multiple_files(
240
  try:
241
  # ==Read file content==
242
  raw = uploaded_file.read()
243
- text_content = raw.decode(
244
- 'utf-8') if isinstance(raw, bytes) else raw
245
 
246
  # ==Process the file==
247
  result = process_single_file(
@@ -250,7 +263,7 @@ def process_multiple_files(
250
  model_choice,
251
  load_model_func,
252
  run_inference_func,
253
- label_file_func
254
  )
255
 
256
  if result:
@@ -269,85 +282,117 @@ def process_multiple_files(
269
  processing_time=result["processing_time"],
270
  metadata={
271
  "confidence_level": result["confidence_level"],
272
- "confidence_emoji": result["confidence_emoji"]
273
- }
274
  )
275
 
276
  except Exception as e:
277
  ErrorHandler.log_error(e, f"reading file {uploaded_file.name}")
278
- results.append({
279
- "filename": uploaded_file.name,
280
- "success": False,
281
- "error": f"Failed to read file: {str(e)}"
282
- })
 
 
283
 
284
  if progress_callback:
285
  progress_callback(total_files, total_files, "Complete")
286
 
287
  ErrorHandler.log_info(
288
- f"Completed batch processing: {sum(1 for r in results if r.get('success', False))}/{total_files} successful")
 
289
 
290
  return results
291
 
292
 
293
- def display_batch_results(results: List[Dict[str, Any]]) -> None:
294
- """
295
- Display batch processing results in the UI
296
-
297
- Args:
298
- results: List of processing results
299
- """
300
- if not results:
301
- st.warning("No results to display")
302
  return
303
 
304
- successful = [r for r in results if r.get("success", False)]
305
- failed = [r for r in results if not r.get("success", False)]
306
-
307
- # ==Summary==
308
- col1, col2, col3 = st.columns(3, border=True)
309
- with col1:
310
- st.metric("Total Files", len(results))
311
- with col2:
312
- st.metric("Successful", len(successful),
313
- delta=f"{len(successful)/len(results)*100:.1f}%")
314
- with col3:
315
- st.metric("Failed", len(
316
- failed), delta=f"-{len(failed)/len(results)*100:.1f}%" if failed else "0%")
317
-
318
- # ==Results tabs==
319
- tab1, tab2 = st.tabs(["✅Successful", "❌ Failed"], width="stretch")
320
-
321
- with tab1:
322
- with st.expander("Successful"):
323
- if successful:
324
- for result in successful:
325
- with st.expander(f"{result['filename']}", expanded=False):
326
- col1, col2 = st.columns(2)
327
- with col1:
328
- st.write(
329
- f"**Prediction:** {result['predicted_class']}")
330
- st.write(
331
- f"**Confidence:** {result['confidence_emoji']} {result['confidence_level']} ({result['confidence']:.3f})")
332
- with col2:
333
- st.write(
334
- f"**Processing Time:** {result['processing_time']:.3f}s")
335
- if result['ground_truth'] is not None:
336
- gt_label = {0: "Stable", 1: "Weathered"}.get(
337
- result['ground_truth'], "Unknown")
338
- correct = "✅" if result['prediction'] == result['ground_truth'] else ""
339
- st.write(
340
- f"**Ground Truth:** {gt_label} {correct}")
341
- else:
342
- st.info("No successful results")
343
-
344
- with tab2:
345
- if failed:
346
- for result in failed:
347
- with st.expander(f"{result['filename']}", expanded=False):
348
- st.error(f"Error: {result.get('error', 'Unknown error')}")
349
- else:
350
- st.success("No failed files!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
 
353
  def create_batch_uploader() -> List:
@@ -362,7 +407,7 @@ def create_batch_uploader() -> List:
362
  type="txt",
363
  accept_multiple_files=True,
364
  help="Select multiple .txt files with wavenumber and intensity columns",
365
- key="batch_uploader"
366
  )
367
 
368
  return uploaded_files if uploaded_files else []
 
5
  import time
6
  import streamlit as st
7
  import numpy as np
8
+ import pandas as pd
9
 
10
  from .preprocessing import resample_spectrum
11
  from .errors import ErrorHandler, safe_execute
 
13
  from .confidence import calculate_softmax_confidence
14
 
15
 
16
+ def parse_spectrum_data(
17
+ text_content: str, filename: str = "unknown"
18
+ ) -> Tuple[np.ndarray, np.ndarray]:
19
  """
20
  Parse spectrum data from text content
21
 
 
30
  ValueError: If the data cannot be parsed
31
  """
32
  try:
33
+ lines = text_content.strip().split("\n")
34
 
35
  # ==Remove empty lines and comments==
36
  data_lines = []
37
  for line in lines:
38
  line = line.strip()
39
+ if line and not line.startswith("#") and not line.startswith("%"):
40
  data_lines.append(line)
41
 
42
  if not data_lines:
 
49
  try:
50
  # Handle different separators
51
  parts = line.replace(",", " ").split()
52
+ numbers = [
53
+ p
54
+ for p in parts
55
+ if p.replace(".", "", 1)
56
+ .replace("-", "", 1)
57
+ .replace("+", "", 1)
58
+ .isdigit()
59
+ ]
60
  if len(numbers) >= 2:
61
  x_val = float(numbers[0])
62
  y_val = float(numbers[1])
 
65
 
66
  except ValueError:
67
  ErrorHandler.log_warning(
68
+ f"Could not parse line {i+1}: {line}", f"Parsing {filename}"
69
+ )
70
  continue
71
 
72
  if len(x_vals) < 10: # ==Need minimum points for interpolation==
73
  raise ValueError(
74
+ f"Insufficient data points ({len(x_vals)}). Need at least 10 points."
75
+ )
76
 
77
  x = np.array(x_vals)
78
  y = np.array(y_vals)
 
88
  # Check reasonable range for Raman spectroscopy
89
  if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100:
90
  raise ValueError(
91
+ f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100"
92
+ )
93
 
94
  return x, y
95
 
 
103
  model_choice: str,
104
  load_model_func,
105
  run_inference_func,
106
+ label_file_func,
107
  ) -> Optional[Dict[str, Any]]:
108
  """
109
  Process a single spectrum file
 
128
  text_content,
129
  filename,
130
  error_context=f"parsing {filename}",
131
+ show_error=False,
132
  )
133
 
134
  if not success or result is None:
 
141
  resample_spectrum,
142
  x_raw,
143
  y_raw,
144
+ 500, # TARGET_LEN
145
  error_context=f"resampling {filename}",
146
+ show_error=False,
147
  )
148
 
149
  if not success or result is None:
 
157
  y_resampled,
158
  model_choice,
159
  error_context=f"inference on {filename}",
160
+ show_error=False,
161
  )
162
 
163
  if not success or result is None:
164
  ErrorHandler.log_error(
165
+ Exception("Inference failed"), f"processing {filename}"
166
+ )
167
  return None
168
 
169
  prediction, logits_list, probs, inference_time, logits = result
170
 
171
  # ==Calculate confidence==
172
  if logits is not None:
173
+ probs_np, max_confidence, confidence_level, confidence_emoji = (
174
+ calculate_softmax_confidence(logits)
175
+ )
176
  else:
177
  probs_np = np.array([])
178
  max_confidence = 0.0
 
216
  "filename": filename,
217
  "success": False,
218
  "error": str(e),
219
+ "processing_time": time.time() - start_time,
220
  }
221
 
222
 
 
226
  load_model_func,
227
  run_inference_func,
228
  label_file_func,
229
+ progress_callback=None,
230
  ) -> List[Dict[str, Any]]:
231
  """
232
  Process multiple uploaded files
 
254
  try:
255
  # ==Read file content==
256
  raw = uploaded_file.read()
257
+ text_content = raw.decode("utf-8") if isinstance(raw, bytes) else raw
 
258
 
259
  # ==Process the file==
260
  result = process_single_file(
 
263
  model_choice,
264
  load_model_func,
265
  run_inference_func,
266
+ label_file_func,
267
  )
268
 
269
  if result:
 
282
  processing_time=result["processing_time"],
283
  metadata={
284
  "confidence_level": result["confidence_level"],
285
+ "confidence_emoji": result["confidence_emoji"],
286
+ },
287
  )
288
 
289
  except Exception as e:
290
  ErrorHandler.log_error(e, f"reading file {uploaded_file.name}")
291
+ results.append(
292
+ {
293
+ "filename": uploaded_file.name,
294
+ "success": False,
295
+ "error": f"Failed to read file: {str(e)}",
296
+ }
297
+ )
298
 
299
  if progress_callback:
300
  progress_callback(total_files, total_files, "Complete")
301
 
302
  ErrorHandler.log_info(
303
+ f"Completed batch processing: {sum(1 for r in results if r.get('success', False))}/{total_files} successful"
304
+ )
305
 
306
  return results
307
 
308
 
309
+ def display_batch_results(batch_results: list):
310
+ """Renders a clean, consolidated summary of batch processing results using metrics and a pandas DataFrame replacing the old expander list"""
311
+ if not batch_results:
312
+ st.info("No batch results to display.")
 
 
 
 
 
313
  return
314
 
315
+ successful_runs = [r for r in batch_results if r.get("success", False)]
316
+ failed_runs = [r for r in batch_results if not r.get("success", False)]
317
+
318
+ # 1. High Level Metrics
319
+ st.markdown("###### Batch Summary")
320
+ metric_cols = st.columns(3)
321
+ metric_cols[0].metric("Total Files Processed", f"{len(batch_results)}")
322
+ metric_cols[1].metric("✔️ Successful", f"{len(successful_runs)}")
323
+ metric_cols[2].metric("❌ Failed", f"{len(failed_runs)}")
324
+
325
+ # 3 Hidden Failure Details
326
+ if failed_runs:
327
+ with st.expander(
328
+ f"View details for {len(failed_runs)} failed file(s)", expanded=False
329
+ ):
330
+ for r in failed_runs:
331
+ st.error(f"**File:** `{r.get('filename', 'unknown')}`")
332
+ st.caption(
333
+ f"Reason for failure: {r.get('error', 'No details provided')}"
334
+ )
335
+
336
+
337
+ # Legacy display batch results
338
+ # def display_batch_results(results: List[Dict[str, Any]]) -> None:
339
+ # """
340
+ # Display batch processing results in the UI
341
+
342
+ # Args:
343
+ # results: List of processing results
344
+ # """
345
+ # if not results:
346
+ # st.warning("No results to display")
347
+ # return
348
+
349
+ # successful = [r for r in results if r.get("success", False)]
350
+ # failed = [r for r in results if not r.get("success", False)]
351
+
352
+ # # ==Summary==
353
+ # col1, col2, col3 = st.columns(3, border=True)
354
+ # with col1:
355
+ # st.metric("Total Files", len(results))
356
+ # with col2:
357
+ # st.metric("Successful", len(successful),
358
+ # delta=f"{len(successful)/len(results)*100:.1f}%")
359
+ # with col3:
360
+ # st.metric("Failed", len(
361
+ # failed), delta=f"-{len(failed)/len(results)*100:.1f}%" if failed else "0%")
362
+
363
+ # # ==Results tabs==
364
+ # tab1, tab2 = st.tabs(["✅Successful", "❌ Failed"], width="stretch")
365
+
366
+ # with tab1:
367
+ # with st.expander("Successful"):
368
+ # if successful:
369
+ # for result in successful:
370
+ # with st.expander(f"{result['filename']}", expanded=False):
371
+ # col1, col2 = st.columns(2)
372
+ # with col1:
373
+ # st.write(
374
+ # f"**Prediction:** {result['predicted_class']}")
375
+ # st.write(
376
+ # f"**Confidence:** {result['confidence_emoji']} {result['confidence_level']} ({result['confidence']:.3f})")
377
+ # with col2:
378
+ # st.write(
379
+ # f"**Processing Time:** {result['processing_time']:.3f}s")
380
+ # if result['ground_truth'] is not None:
381
+ # gt_label = {0: "Stable", 1: "Weathered"}.get(
382
+ # result['ground_truth'], "Unknown")
383
+ # correct = "✅" if result['prediction'] == result['ground_truth'] else "❌"
384
+ # st.write(
385
+ # f"**Ground Truth:** {gt_label} {correct}")
386
+ # else:
387
+ # st.info("No successful results")
388
+
389
+ # with tab2:
390
+ # if failed:
391
+ # for result in failed:
392
+ # with st.expander(f"❌ {result['filename']}", expanded=False):
393
+ # st.error(f"Error: {result.get('error', 'Unknown error')}")
394
+ # else:
395
+ # st.success("No failed files!")
396
 
397
 
398
  def create_batch_uploader() -> List:
 
407
  type="txt",
408
  accept_multiple_files=True,
409
  help="Select multiple .txt files with wavenumber and intensity columns",
410
+ key="batch_uploader",
411
  )
412
 
413
  return uploaded_files if uploaded_files else []