oliver-aizip commited on
Commit
347797e
·
1 Parent(s): e7fd2d9

update data pipeline

Browse files
Files changed (4) hide show
  1. app.py +61 -32
  2. utils/arena_df.csv +0 -0
  3. utils/context_processor.py +57 -467
  4. utils/data_loader.py +91 -80
app.py CHANGED
@@ -37,6 +37,7 @@ def load_context(set_interrupt=False):
37
  generation_interrupt.clear()
38
  example = get_random_example()
39
 
 
40
  context_desc = example.get('processed_context_desc', '')
41
  if context_desc:
42
  context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
@@ -75,14 +76,17 @@ def generate_model_summaries(example):
75
 
76
  try:
77
  m_a_name, m_b_name = random.sample(model_names, 2)
 
 
 
 
 
78
  s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
79
 
80
  if not generation_interrupt.is_set():
81
- result["model_a"] = m_a_name
82
- result["model_b"] = m_b_name
83
  result["summary_a"] = s_a
84
  result["summary_b"] = s_b
85
- result["completed"] = True
86
  except Exception as e:
87
  print(f"Error in generation: {e}")
88
 
@@ -90,16 +94,20 @@ def generate_model_summaries(example):
90
 
91
  def process_generation_result(result):
92
  """Process the results from the generation function"""
93
- if not result["completed"]:
94
- # Generation was interrupted or failed
95
  return [
96
- "", "", "", "", None, [], False, load_leaderboard_data(),
97
- gr.update(value="Generation was interrupted or failed. Please try again."),
98
- gr.update(value="Generation was interrupted or failed. Please try again."),
99
- gr.update(interactive=True, elem_classes=["vote-button"]),
100
- gr.update(interactive=True, elem_classes=["vote-button"]),
101
- gr.update(interactive=True, elem_classes=["vote-button"]),
102
- gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]),
 
 
 
 
103
  gr.update(choices=[], value=[], interactive=False, visible=False),
104
  gr.update(visible=False),
105
  gr.update(interactive=False, visible=True),
@@ -108,6 +116,9 @@ def process_generation_result(result):
108
  gr.update(elem_classes=[])
109
  ]
110
 
 
 
 
111
  # Generation completed successfully
112
  agg_results = load_leaderboard_data()
113
  return [
@@ -116,10 +127,10 @@ def process_generation_result(result):
116
  None, [], False, agg_results,
117
  gr.update(value=result["summary_a"]),
118
  gr.update(value=result["summary_b"]),
119
- gr.update(interactive=True, elem_classes=["vote-button"]),
120
- gr.update(interactive=True, elem_classes=["vote-button"]),
121
- gr.update(interactive=True, elem_classes=["vote-button"]),
122
- gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]),
123
  gr.update(choices=[], value=[], interactive=False, visible=False),
124
  gr.update(visible=False),
125
  gr.update(interactive=False, visible=True),
@@ -178,10 +189,10 @@ def show_loading_state():
178
  return [
179
  gr.update(value="Loading new question and summaries...", interactive=False),
180
  gr.update(value="Loading new question and summaries...", interactive=False),
181
- gr.update(interactive=False),
182
- gr.update(interactive=False),
183
- gr.update(interactive=False),
184
- gr.update(interactive=False)
185
  ]
186
 
187
  def handle_new_example_click():
@@ -191,9 +202,14 @@ def handle_new_example_click():
191
 
192
  def update_ui_for_new_context(example):
193
  """Update UI with new context information"""
 
 
 
 
 
194
  return [
195
  gr.update(value=example['question']),
196
- gr.update(value=example.get('processed_context_desc', ''), visible=bool(example.get('processed_context_desc', ''))),
197
  gr.update(value=get_context_html(example, False)),
198
  gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
199
  False
@@ -204,6 +220,8 @@ def cleanup_on_disconnect():
204
  """Clean up resources when browser disconnects"""
205
  print(f"Browser disconnected. Cleaning up resources...")
206
  generation_interrupt.set()
 
 
207
 
208
  # Create Gradio interface
209
  with gr.Blocks(theme=gr.themes.Default(
@@ -213,11 +231,11 @@ with gr.Blocks(theme=gr.themes.Default(
213
  # Load CSS
214
  css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
215
 
216
- # Load the files
217
  with open(css_path, 'r') as f:
218
  css_content = f.read()
219
 
220
- # Create HTML components with CSS and JavaScript links
221
  gr.HTML(f"<style>{css_content}</style>")
222
 
223
  # Add JavaScript to handle browser unload events
@@ -309,10 +327,10 @@ with gr.Blocks(theme=gr.themes.Default(
309
  # Voting section
310
  gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
311
  with gr.Row():
312
- vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"])
313
- vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"])
314
- vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
315
- vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"])
316
 
317
  # Feedback and Submit sections
318
  with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
@@ -388,15 +406,17 @@ The Elo rating system provides a more accurate ranking than simple win rates:
388
  outputs=[results_table_display]
389
  )
390
 
391
- # Random Question and Try Another buttons with interruption
 
392
  for btn in [random_question_btn, try_another_btn]:
393
- btn.click(
394
- fn=show_loading_state, # First show loading state
 
395
  inputs=[],
396
  outputs=[summary_a_display, summary_b_display, vote_button_a,
397
  vote_button_b, vote_button_tie, vote_button_neither]
398
  ).then(
399
- fn=handle_new_example_click, # Now uses the centralized approach
400
  inputs=[],
401
  outputs=[current_example]
402
  ).then(
@@ -404,9 +424,18 @@ The Elo rating system provides a more accurate ranking than simple win rates:
404
  inputs=[current_example],
405
  outputs=[query_display, context_description, context_display,
406
  context_toggle_btn, show_full_context]
 
 
 
 
 
 
 
 
 
407
  ).then(
408
  fn=process_example,
409
- inputs=[current_example],
410
  outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
411
  selected_winner, feedback_list, show_results_state, results_agg,
412
  summary_a_display, summary_b_display, vote_button_a, vote_button_b,
 
37
  generation_interrupt.clear()
38
  example = get_random_example()
39
 
40
+ # Format the context description
41
  context_desc = example.get('processed_context_desc', '')
42
  if context_desc:
43
  context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
 
76
 
77
  try:
78
  m_a_name, m_b_name = random.sample(model_names, 2)
79
+
80
+ # Track the partial completion state
81
+ result["model_a"] = m_a_name
82
+ result["model_b"] = m_b_name
83
+
84
  s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
85
 
86
  if not generation_interrupt.is_set():
 
 
87
  result["summary_a"] = s_a
88
  result["summary_b"] = s_b
89
+ result["completed"] = bool(s_a and s_b) # Only mark complete if both have content
90
  except Exception as e:
91
  print(f"Error in generation: {e}")
92
 
 
94
 
95
  def process_generation_result(result):
96
  """Process the results from the generation function"""
97
+ if not result["completed"] or not result["summary_a"] or not result["summary_b"]:
98
+ # Either generation was interrupted or both summaries aren't ready
99
  return [
100
+ result.get("model_a", ""),
101
+ result.get("model_b", ""),
102
+ result.get("summary_a", ""),
103
+ result.get("summary_b", ""),
104
+ None, [], False, load_leaderboard_data(),
105
+ gr.update(value=result.get("summary_a", "Generation was interrupted or failed.")),
106
+ gr.update(value=result.get("summary_b", "Generation was interrupted or failed.")),
107
+ gr.update(interactive=False, elem_classes=["vote-button"]), # Explicitly disable
108
+ gr.update(interactive=False, elem_classes=["vote-button"]),
109
+ gr.update(interactive=False, elem_classes=["vote-button"]),
110
+ gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]),
111
  gr.update(choices=[], value=[], interactive=False, visible=False),
112
  gr.update(visible=False),
113
  gr.update(interactive=False, visible=True),
 
116
  gr.update(elem_classes=[])
117
  ]
118
 
119
+ # Only enable voting when both summaries are complete and non-empty
120
+ buttons_interactive = bool(result["summary_a"] and result["summary_b"])
121
+
122
  # Generation completed successfully
123
  agg_results = load_leaderboard_data()
124
  return [
 
127
  None, [], False, agg_results,
128
  gr.update(value=result["summary_a"]),
129
  gr.update(value=result["summary_b"]),
130
+ gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
131
+ gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
132
+ gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
133
+ gr.update(interactive=buttons_interactive, elem_classes=["vote-button", "vote-button-neither"]),
134
  gr.update(choices=[], value=[], interactive=False, visible=False),
135
  gr.update(visible=False),
136
  gr.update(interactive=False, visible=True),
 
189
  return [
190
  gr.update(value="Loading new question and summaries...", interactive=False),
191
  gr.update(value="Loading new question and summaries...", interactive=False),
192
+ gr.update(interactive=False), # For vote_button_a
193
+ gr.update(interactive=False), # For vote_button_b
194
+ gr.update(interactive=False), # For vote_button_tie
195
+ gr.update(interactive=False) # For vote_button_neither
196
  ]
197
 
198
  def handle_new_example_click():
 
202
 
203
  def update_ui_for_new_context(example):
204
  """Update UI with new context information"""
205
+ # Format the context description
206
+ context_desc = example.get('processed_context_desc', '')
207
+ if context_desc:
208
+ context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
209
+
210
  return [
211
  gr.update(value=example['question']),
212
+ gr.update(value=context_desc, visible=bool(context_desc)),
213
  gr.update(value=get_context_html(example, False)),
214
  gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
215
  False
 
220
  """Clean up resources when browser disconnects"""
221
  print(f"Browser disconnected. Cleaning up resources...")
222
  generation_interrupt.set()
223
+ # No need for time.sleep here as this is just setting the flag
224
+ # Threads will detect it on their next check
225
 
226
  # Create Gradio interface
227
  with gr.Blocks(theme=gr.themes.Default(
 
231
  # Load CSS
232
  css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
233
 
234
+ # Load the CSS file
235
  with open(css_path, 'r') as f:
236
  css_content = f.read()
237
 
238
+ # Create HTML components with CSS
239
  gr.HTML(f"<style>{css_content}</style>")
240
 
241
  # Add JavaScript to handle browser unload events
 
327
  # Voting section
328
  gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
329
  with gr.Row():
330
+ vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"], interactive=False)
331
+ vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"], interactive=False)
332
+ vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"], interactive=False)
333
+ vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"], interactive=False)
334
 
335
  # Feedback and Submit sections
336
  with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
 
406
  outputs=[results_table_display]
407
  )
408
 
409
+ # Alternative approach: use two separate clicks for each button
410
+ # First click event: Update UI immediately
411
  for btn in [random_question_btn, try_another_btn]:
412
+ # Handle UI updates first
413
+ event1 = btn.click(
414
+ fn=show_loading_state,
415
  inputs=[],
416
  outputs=[summary_a_display, summary_b_display, vote_button_a,
417
  vote_button_b, vote_button_tie, vote_button_neither]
418
  ).then(
419
+ fn=handle_new_example_click,
420
  inputs=[],
421
  outputs=[current_example]
422
  ).then(
 
424
  inputs=[current_example],
425
  outputs=[query_display, context_description, context_display,
426
  context_toggle_btn, show_full_context]
427
+ )
428
+
429
+ # Second click event for each button runs in parallel with the first
430
+ for btn in [random_question_btn, try_another_btn]:
431
+ # Generate model outputs (potentially slower operation)
432
+ event2 = btn.click(
433
+ fn=handle_new_example_click, # This will be called separately from the first event
434
+ inputs=[],
435
+ outputs=[current_example]
436
  ).then(
437
  fn=process_example,
438
+ inputs=[current_example],
439
  outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
440
  selected_winner, feedback_list, show_results_state, results_agg,
441
  summary_a_display, summary_b_display, vote_button_a, vote_button_b,
utils/arena_df.csv CHANGED
The diff for this file is too large to render. See raw diff
 
utils/context_processor.py CHANGED
@@ -1,460 +1,51 @@
1
  import re
2
  import html
3
  import json
4
- from typing import Dict, List, Tuple, Optional, Any, Union
5
 
6
- class ContextProcessor:
7
- """Processes highlighted contexts for the RAG Summarizer Arena"""
8
-
9
- # Common HTML entities that might be incomplete
10
- INCOMPLETE_ENTITIES = {
11
- '&#x27': '&#x27;',
12
- '&quot': '&quot;',
13
- '&lt': '&lt;',
14
- '&gt': '&gt;',
15
- '&amp': '&amp;'
16
- }
17
-
18
- @staticmethod
19
- def clean_text(text: str) -> str:
20
- """Cleans text by fixing HTML entities and handling escaped characters"""
21
- if not text or not isinstance(text, str):
22
- return text
23
-
24
- # Fix incomplete HTML entities
25
- for incomplete, complete in ContextProcessor.INCOMPLETE_ENTITIES.items():
26
- text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text)
27
-
28
- # Convert HTML entities to characters
29
- try:
30
- text = html.unescape(text)
31
- except Exception:
32
- pass
33
-
34
- # Handle escaped quotes and special characters
35
- replacements = {
36
- r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\",
37
- '"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'"
38
- }
39
- for pattern, replacement in replacements.items():
40
- text = text.replace(pattern, replacement)
41
-
42
- # Remove trailing backslash if present
43
- if text.rstrip().endswith('\\'):
44
- text = text.rstrip().rstrip('\\')
45
-
46
- return text
47
-
48
- @staticmethod
49
- def balance_highlight_tags(text: str) -> str:
50
- """Ensures highlight tags are properly balanced"""
51
- if not text or not isinstance(text, str):
52
- return text
53
-
54
- # Define highlight tag patterns
55
- highlight_pairs = [
56
- ('[[start_highlight]]', '[[end_highlight]]'),
57
- ('[[highlight_start]]', '[[highlight_end]]'),
58
- ('<span class="highlight">', '</span>')
59
- ]
60
-
61
- # Check and balance each pair
62
- for start_tag, end_tag in highlight_pairs:
63
- start_count = text.count(start_tag)
64
- end_count = text.count(end_tag)
65
-
66
- # Add missing tags if needed
67
- if start_count > end_count:
68
- text += end_tag * (start_count - end_count)
69
- elif end_count > start_count:
70
- text = start_tag * (end_count - start_count) + text
71
-
72
- return text
73
-
74
- @staticmethod
75
- def balance_quotes(text: str) -> str:
76
- """Ensures quotes are properly balanced"""
77
- if not text or not isinstance(text, str):
78
- return text
79
-
80
- # First, remove escaped quotes from the count
81
- plain_text = text.replace('\\"', '')
82
-
83
- # Count quotes and balance if needed
84
- quote_count = plain_text.count('"')
85
- if quote_count % 2 == 1:
86
- text += '"'
87
-
88
  return text
89
 
90
- @staticmethod
91
- def extract_highlight_parts(text: str) -> List[Tuple[bool, str]]:
92
- """
93
- Extracts highlighted and non-highlighted parts from text, preserving order
94
- """
95
- # Ensure highlight tags are balanced
96
- text = ContextProcessor.balance_highlight_tags(text)
97
-
98
- # Define all highlight patterns
99
- highlight_patterns = [
100
- ('[[start_highlight]]', '[[end_highlight]]'),
101
- ('[[highlight_start]]', '[[highlight_end]]'),
102
- ('<span class="highlight">', '</span>')
103
- ]
104
-
105
- # Collect all highlight sections with their positions
106
- all_highlights = []
107
-
108
- for start_tag, end_tag in highlight_patterns:
109
- # Escape special regex characters if needed
110
- start_esc = re.escape(start_tag)
111
- end_esc = re.escape(end_tag)
112
-
113
- # Find all occurrences of this highlight pattern
114
- for match in re.finditer(f"{start_esc}(.*?){end_esc}", text, re.DOTALL):
115
- all_highlights.append({
116
- 'start': match.start(),
117
- 'end': match.end(),
118
- 'content': match.group(1),
119
- 'start_tag': start_tag,
120
- 'end_tag': end_tag
121
- })
122
-
123
- # If no highlights found, return the whole text as unhighlighted
124
- if not all_highlights:
125
- return [(False, text)]
126
-
127
- # Sort highlights by start position
128
- all_highlights.sort(key=lambda x: x['start'])
129
-
130
- # Build the parts list by processing text portions between and including highlights
131
- parts = []
132
- current_pos = 0
133
-
134
- for highlight in all_highlights:
135
- # Add non-highlighted text before this highlight
136
- if highlight['start'] > current_pos:
137
- parts.append((False, text[current_pos:highlight['start']]))
138
-
139
- # Add the highlighted text
140
- parts.append((True, highlight['content']))
141
-
142
- # Update position to end of this highlight
143
- current_pos = highlight['end']
144
-
145
- # Add any remaining text after the last highlight
146
- if current_pos < len(text):
147
- parts.append((False, text[current_pos:]))
148
-
149
- return parts
150
-
151
- @staticmethod
152
- def is_markdown_table(text: str) -> bool:
153
- """Checks if text looks like a markdown table"""
154
- if not text or not isinstance(text, str):
155
- return False
156
-
157
- if '|' in text and '\n' in text:
158
- lines = text.strip().split('\n')
159
- pipe_lines = sum(1 for line in lines if line.strip().startswith('|'))
160
- return pipe_lines >= 2
161
-
162
- return False
163
-
164
- @staticmethod
165
- def process_cell_content(cell_text: str) -> str:
166
- """Processes a single table cell, handling highlights if present"""
167
- # Clean and prepare the text
168
- cell_text = ContextProcessor.clean_text(cell_text)
169
- cell_text = ContextProcessor.balance_quotes(cell_text)
170
-
171
- # Check if cell has any highlight tags
172
- has_highlights = False
173
- highlight_patterns = [
174
- '[[start_highlight]]', '[[end_highlight]]',
175
- '[[highlight_start]]', '[[highlight_end]]',
176
- '<span class="highlight">', '</span>'
177
- ]
178
-
179
- for pattern in highlight_patterns:
180
- if pattern in cell_text:
181
- has_highlights = True
182
- break
183
-
184
- if has_highlights:
185
- # Extract and process highlight parts
186
- parts = ContextProcessor.extract_highlight_parts(cell_text)
187
-
188
- # Build the result
189
- result = ""
190
- for is_highlighted, part in parts:
191
- if is_highlighted:
192
- result += f'<span class="highlight">{html.escape(part)}</span>'
193
- else:
194
- result += html.escape(part)
195
-
196
- return result
197
- else:
198
- # Just escape HTML in regular cells
199
- return html.escape(cell_text)
200
-
201
- @staticmethod
202
- def convert_table_to_html(text: str) -> str:
203
- """Converts markdown table to HTML with support for highlights in cells"""
204
- # Clean the text
205
- text = ContextProcessor.clean_text(text)
206
-
207
- # Split into lines and get table rows
208
- lines = text.strip().split('\n')
209
- table_lines = [line for line in lines if line.strip().startswith('|')]
210
-
211
- # Check if it's a proper table
212
- if len(table_lines) < 2:
213
- return ContextProcessor.process_text(text)
214
-
215
- # Check if second line is a separator (----)
216
- has_header = False
217
- if len(table_lines) >= 2 and '---' in table_lines[1]:
218
- has_header = True
219
-
220
- # Start building HTML table
221
- html_output = '<table class="md-table">'
222
-
223
- if has_header:
224
- # Process header row
225
- header_line = table_lines[0]
226
- # Split by pipe and remove empty first and last elements
227
- cells = [cell.strip() for cell in header_line.split('|')]
228
- if cells and not cells[0]:
229
- cells.pop(0)
230
- if cells and not cells[-1]:
231
- cells.pop()
232
-
233
- html_output += '<thead><tr>'
234
- for cell in cells:
235
- cell_html = ContextProcessor.process_cell_content(cell)
236
- html_output += f'<th>{cell_html}</th>'
237
- html_output += '</tr></thead>'
238
-
239
- # Process data rows (skip header and separator)
240
- html_output += '<tbody>'
241
- for line in table_lines[2:]:
242
- cells = [cell.strip() for cell in line.split('|')]
243
- if cells and not cells[0]:
244
- cells.pop(0)
245
- if cells and not cells[-1]:
246
- cells.pop()
247
-
248
- html_output += '<tr>'
249
- for cell in cells:
250
- cell_html = ContextProcessor.process_cell_content(cell)
251
- html_output += f'<td>{cell_html}</td>'
252
- html_output += '</tr>'
253
- html_output += '</tbody>'
254
- else:
255
- # All rows are data
256
- html_output += '<tbody>'
257
- for line in table_lines:
258
- cells = [cell.strip() for cell in line.split('|')]
259
- if cells and not cells[0]:
260
- cells.pop(0)
261
- if cells and not cells[-1]:
262
- cells.pop()
263
-
264
- html_output += '<tr>'
265
- for cell in cells:
266
- cell_html = ContextProcessor.process_cell_content(cell)
267
- html_output += f'<td>{cell_html}</td>'
268
- html_output += '</tr>'
269
- html_output += '</tbody>'
270
-
271
- html_output += '</table>'
272
- return html_output
273
 
274
- @staticmethod
275
- def process_text(text: str) -> str:
276
- """Processes text with highlights, handling all edge cases"""
277
- # Clean and prepare the text
278
- text = ContextProcessor.clean_text(text)
279
- text = ContextProcessor.balance_quotes(text)
280
- text = ContextProcessor.balance_highlight_tags(text)
281
-
282
- # Extract and process highlight parts
283
- parts = ContextProcessor.extract_highlight_parts(text)
284
-
285
- # Build the result
286
- result = ""
287
- for is_highlighted, part in parts:
288
- if is_highlighted:
289
- escaped_part = html.escape(part)
290
- result += f'<span class="highlight">{escaped_part}</span>'
291
- else:
292
- result += html.escape(part)
293
-
294
- return result
295
 
296
- @staticmethod
297
- def process_content(content: str, abbreviated_content: Optional[str] = None) -> str:
298
- """Main function to process any kind of content"""
299
- # Handle null/empty content
300
- if not content or not isinstance(content, str):
301
- return ""
302
-
303
- # Special cases that need abbreviated content
304
- special_cases = [
305
- lambda c: c.strip() == "In Oklahoma,",
306
- lambda c: c.strip().startswith('"') and c.count('"') == 1,
307
- lambda c: c.rstrip().endswith('\\'),
308
- lambda c: (c.replace('\\"', '').count('"') % 2) == 1,
309
- lambda c: any((c.count(start) != c.count(end)) for start, end in [
310
- ('[[start_highlight]]', '[[end_highlight]]'),
311
- ('[[highlight_start]]', '[[highlight_end]]'),
312
- ('<span class="highlight">', '</span>')
313
- ])
314
- ]
315
-
316
- # Check if we need to use abbreviated content
317
- needs_abbreviated = any(check(content) for check in special_cases)
318
-
319
- # If content needs help and we have abbreviated content, use it
320
- if needs_abbreviated and abbreviated_content:
321
- # Handle abbreviated content that might be a JSON string
322
- if abbreviated_content.strip().startswith('{') and abbreviated_content.strip().endswith('}'):
323
- try:
324
- data = json.loads(abbreviated_content)
325
- if "abbreviatedContent" in data:
326
- abbreviated_content = data["abbreviatedContent"]
327
- except json.JSONDecodeError:
328
- pass
329
-
330
- # Clean and prepare the abbreviated content
331
- abbreviated_content = ContextProcessor.clean_text(abbreviated_content)
332
- abbreviated_content = ContextProcessor.balance_quotes(abbreviated_content)
333
- abbreviated_content = ContextProcessor.balance_highlight_tags(abbreviated_content)
334
-
335
- # Use abbreviated content instead
336
- content = abbreviated_content
337
-
338
- # Check if content is a markdown table
339
- if ContextProcessor.is_markdown_table(content):
340
- return ContextProcessor.convert_table_to_html(content)
341
- else:
342
- return ContextProcessor.process_text(content)
343
 
344
- @staticmethod
345
- def parse_json_contexts(context_json: str) -> List[Dict[str, Any]]:
346
- """Parses JSON-formatted context data with fallback to regex extraction"""
347
- contexts = []
348
-
349
- # First try standard JSON parsing
350
- try:
351
- contexts = json.loads(context_json)
352
- if not isinstance(contexts, list):
353
- contexts = []
354
- except json.JSONDecodeError:
355
- # If standard parsing fails, use regex to extract the data
356
- try:
357
- # Extract type field
358
- type_pattern = r'"type":\s*"(primary|secondary)"'
359
- types = re.findall(type_pattern, context_json)
360
-
361
- # Extract abbreviatedContent field - more robustly handle quotes
362
- content_pattern = r'"abbreviatedContent":\s*"((?:\\.|[^"])*?)"'
363
- contents = re.findall(content_pattern, context_json)
364
-
365
- # Build context objects
366
- for i, (ctx_type, content) in enumerate(zip(types, contents)):
367
- contexts.append({
368
- 'type': ctx_type,
369
- 'abbreviatedContent': content.replace('\\"', '"')
370
- })
371
- except Exception as e:
372
- print(f"Error extracting contexts with regex: {e}")
373
-
374
- return contexts
375
 
376
- @staticmethod
377
- def process_json_contexts(context_json: str) -> List[Dict[str, Any]]:
378
- """Process JSON-formatted highlighted contexts"""
379
- processed_contexts = []
380
-
381
- try:
382
- # Parse the JSON contexts
383
- contexts = ContextProcessor.parse_json_contexts(context_json)
384
-
385
- # Process each context item
386
- for i, item in enumerate(contexts):
387
- if isinstance(item, dict):
388
- context_type = item.get('type', 'secondary')
389
- content = item.get('abbreviatedContent', '')
390
-
391
- # Process the content
392
- processed_content = ContextProcessor.process_content(content)
393
-
394
- # Create processed context item
395
- processed_contexts.append({
396
- 'chunk_num': i + 1,
397
- 'content': processed_content,
398
- 'is_primary': context_type == 'primary'
399
- })
400
- except Exception as e:
401
- print(f"Error processing JSON contexts: {e}")
402
-
403
- return processed_contexts
404
-
405
-
406
- # Module-level functions for backward compatibility
407
- def clean_text(text):
408
- return ContextProcessor.clean_text(text)
409
-
410
- def balance_highlight_tags(text):
411
- return ContextProcessor.balance_highlight_tags(text)
412
-
413
- def balance_quotes(text):
414
- return ContextProcessor.balance_quotes(text)
415
-
416
- def extract_highlight_parts(text):
417
- return ContextProcessor.extract_highlight_parts(text)
418
-
419
- def is_markdown_table(text):
420
- return ContextProcessor.is_markdown_table(text)
421
-
422
- def process_cell_content(cell_text):
423
- return ContextProcessor.process_cell_content(cell_text)
424
-
425
- def convert_table_to_html(text):
426
- return ContextProcessor.convert_table_to_html(text)
427
-
428
- def process_text(text):
429
- return ContextProcessor.process_text(text)
430
-
431
- def process_content(content, abbreviated_content=None):
432
- return ContextProcessor.process_content(content, abbreviated_content)
433
-
434
- def process_highlights(text):
435
- """Main entry point called from data_loader.py"""
436
- return ContextProcessor.process_content(text)
437
 
438
  def get_context_html(example, show_full=False):
439
- """Format context chunks into HTML for display"""
440
  html_output = ""
441
 
442
  # Process insufficient context warning if needed
443
  if example.get("insufficient", False):
444
  insufficient_reason = example.get("insufficient_reason", "")
445
- reason_html = (
446
- f"<p>{insufficient_reason}</p>" if insufficient_reason else
447
- "<p>The context may not contain enough information to fully answer the question, "
448
- "or the question might be ambiguous. Models should ideally indicate this limitation "
449
- "or refuse to answer.</p>"
450
- )
451
 
452
  html_output += f"""
453
  <div class="insufficient-alert">
454
  <strong>
455
- <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none"
456
- stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
457
- style="vertical-align: middle; margin-right: 5px;">
458
  <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
459
  <line x1="12" y1="9" x2="12" y2="13"></line>
460
  <line x1="12" y1="17" x2="12.01" y2="17"></line>
@@ -467,40 +58,39 @@ def get_context_html(example, show_full=False):
467
 
468
  html_output += '<div class="context-items-container">'
469
 
470
- # Display full contexts if requested
471
- if show_full and "full_contexts" in example and example["full_contexts"]:
472
- for context_item in example["full_contexts"]:
473
- content = context_item.get('content', '')
474
- abbreviated = context_item.get('abbreviatedContent', None)
475
-
476
- # Process the content
477
- processed = ContextProcessor.process_content(content, abbreviated)
478
-
479
- html_output += f'<div class="context-item">{processed}</div>'
 
 
 
 
 
 
 
480
  else:
481
- # Display regular contexts if available
482
  if "contexts" in example and example["contexts"]:
483
  for context_item in example["contexts"]:
484
- content = context_item.get('content', '')
485
- abbreviated = context_item.get('abbreviatedContent', None)
486
-
487
- # Process the content
488
- processed = ContextProcessor.process_content(content, abbreviated)
489
-
490
- is_primary = context_item.get('is_primary', False)
491
- extra_class = " primary-context" if is_primary else ""
492
-
493
- html_output += f'<div class="context-item{extra_class}">{processed}</div>'
494
-
495
- # Or process JSON-structured highlighted contexts
496
- elif "contexts_highlighted" in example and example["contexts_highlighted"]:
497
- processed_contexts = ContextProcessor.process_json_contexts(example["contexts_highlighted"])
498
-
499
- for context_item in processed_contexts:
500
- is_primary = context_item.get('is_primary', False)
501
- extra_class = " primary-context" if is_primary else ""
502
-
503
- html_output += f'<div class="context-item{extra_class}">{context_item["content"]}</div>'
504
  else:
505
  html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>'
506
 
 
1
  import re
2
  import html
3
  import json
 
4
 
5
+ def clean_text(text):
6
+ """Clean text with common issues like HTML entities and escaped quotes."""
7
+ if not text or not isinstance(text, str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  return text
9
 
10
+ # Fix incomplete HTML entities
11
+ incomplete_entities = {'&#x27': '&#x27;', '&quot': '&quot;', '&lt': '&lt;', '&gt': '&gt;', '&amp': '&amp;'}
12
+ for incomplete, complete in incomplete_entities.items():
13
+ text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Convert HTML entities to characters
16
+ try:
17
+ text = html.unescape(text)
18
+ except Exception:
19
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Handle escaped quotes and other special characters
22
+ replacements = {
23
+ r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\",
24
+ # Also normalize fancy quotes
25
+ '"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'"
26
+ }
27
+ for pattern, replacement in replacements.items():
28
+ text = text.replace(pattern, replacement)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # Remove trailing backslash if present
31
+ if text.rstrip().endswith('\\'):
32
+ text = text.rstrip().rstrip('\\')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def get_context_html(example, show_full=False):
37
+ """Format context chunks into HTML for display."""
38
  html_output = ""
39
 
40
  # Process insufficient context warning if needed
41
  if example.get("insufficient", False):
42
  insufficient_reason = example.get("insufficient_reason", "")
43
+ reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>"
 
 
 
 
 
44
 
45
  html_output += f"""
46
  <div class="insufficient-alert">
47
  <strong>
48
+ <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;">
 
 
49
  <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
50
  <line x1="12" y1="9" x2="12" y2="13"></line>
51
  <line x1="12" y1="17" x2="12.01" y2="17"></line>
 
58
 
59
  html_output += '<div class="context-items-container">'
60
 
61
+ # Display full contexts or highlighted contexts based on toggle
62
+ if show_full:
63
+ # Show full context - directly use the strings from the list in full_contexts
64
+ if "full_contexts" in example and example["full_contexts"]:
65
+ for context_item in example["full_contexts"]:
66
+ if isinstance(context_item, dict) and 'content' in context_item:
67
+ content = context_item.get('content', '')
68
+ elif isinstance(context_item, str):
69
+ content = context_item
70
+ else:
71
+ content = str(context_item)
72
+
73
+ # Escape HTML entities for safe display
74
+ escaped_content = html.escape(content)
75
+
76
+ # Create the context item box - no headers
77
+ html_output += f'<div class="context-item">{escaped_content}</div>'
78
  else:
79
+ # Show highlighted contexts
80
  if "contexts" in example and example["contexts"]:
81
  for context_item in example["contexts"]:
82
+ if isinstance(context_item, dict):
83
+ content = context_item.get('content', '')
84
+ is_primary = context_item.get('is_primary', False)
85
+
86
+ # Extra class for primary context styling
87
+ extra_class = " primary-context" if is_primary else ""
88
+
89
+ # Use content directly as it already has HTML highlighting
90
+ html_output += f'<div class="context-item{extra_class}">{content}</div>'
91
+ elif isinstance(context_item, str):
92
+ # For direct string contexts
93
+ html_output += f'<div class="context-item">{context_item}</div>'
 
 
 
 
 
 
 
 
94
  else:
95
  html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>'
96
 
utils/data_loader.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import pandas as pd
4
  import random
5
  import re
6
- from .context_processor import process_highlights
7
 
8
  # Global data store - loaded once at import time
9
  _ARENA_DATA = None
@@ -40,10 +39,11 @@ def create_dummy_example():
40
  return {
41
  "question": "Could not load questions from the dataset. Please check the data file.",
42
  "processed_context_desc": "Error: Data not available",
43
- "contexts": ["No context available"],
44
- "full_context": "Error loading context data.",
45
  "Answerable": False,
46
- "insufficient": True
 
47
  }
48
 
49
  def get_random_example():
@@ -64,102 +64,113 @@ def get_random_example():
64
  # Process the example data
65
  processed_example = {
66
  "question": example['question'],
67
- "processed_context_desc": example.get('processed_context_desc', ''),
68
- "Answerable": example.get('Answerable', True), # Default to True unless specified otherwise
69
  "insufficient": example.get('insufficient', False),
70
- "insufficient_reason": example.get('insufficient_reason', '')
 
71
  }
72
 
73
- # Process contexts - for full context
 
 
 
 
 
 
 
 
74
  try:
75
- contexts_raw = example['contexts']
76
- if isinstance(contexts_raw, str):
77
- contexts = json.loads(contexts_raw)
78
- # Store full contexts as individual items
79
- full_contexts = []
80
- if isinstance(contexts, list):
81
- for i, chunk in enumerate(contexts):
82
- if isinstance(chunk, dict) and 'content' in chunk:
83
- full_contexts.append({
84
- 'chunk_num': i + 1,
85
- 'content': chunk.get('content', '')
86
- })
87
- processed_example["full_contexts"] = full_contexts
88
- else:
89
- processed_example["full_contexts"] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  except Exception as e:
91
- print(f"Error processing contexts: {e}")
92
- processed_example["full_contexts"] = []
93
 
94
- # Process highlighted contexts for display
95
  contexts_highlighted = []
96
-
97
  try:
98
- # Check if contexts_highlighted exists
99
  if 'contexts_highlighted' in example and example['contexts_highlighted']:
100
- highlighted_contexts = []
101
 
102
- if isinstance(example['contexts_highlighted'], str):
103
  try:
104
- # Try direct JSON parsing first
105
- raw_str = example['contexts_highlighted']
106
-
107
- # First, manually parse the highlighted contexts using regex
108
- # This is a more robust approach for our specific format
109
- type_pattern = r'"type":\s*"(primary|secondary)"'
110
- content_pattern = r'"abbreviatedContent":\s*"([^"]*)"|"abbreviatedContent":\s*"([^"]*)'
111
-
112
- types = re.findall(type_pattern, raw_str)
113
- # Handle both regular quotes and escaped quotes in content
114
- raw_contents = re.findall(content_pattern, raw_str)
115
 
116
- # Extract contents from tuple matches (the regex has capture groups)
117
- contents = []
118
- for match in raw_contents:
119
- # Get the non-empty string from the tuple
120
- content = next((s for s in match if s), "")
121
- contents.append(content)
122
-
123
- # Create the highlighted contexts from extracted data
124
- for i, (ctx_type, content) in enumerate(zip(types, contents)):
125
- highlighted_contexts.append({
126
- 'type': ctx_type,
127
- 'abbreviatedContent': content
128
- })
 
 
 
 
 
 
129
 
130
- except Exception as e:
131
- print(f"Error extracting contexts with regex: {e}")
132
- else:
133
- # Already an object, not a string
134
- highlighted_contexts = example['contexts_highlighted']
135
-
136
- # Process each context item
137
- for i, item in enumerate(highlighted_contexts):
138
- if isinstance(item, dict):
139
- ctx_type = item.get('type', 'secondary')
140
- content = item.get('abbreviatedContent', '')
141
-
142
- # Process highlights using the standard format
143
- content = process_highlights(content)
144
-
145
- contexts_highlighted.append({
146
- 'chunk_num': i + 1,
147
- 'content': content,
148
- 'is_primary': ctx_type == 'primary'
149
- })
150
  except Exception as e:
151
  print(f"Error processing highlighted contexts: {e}")
152
 
153
- # If we couldn't process the highlighted contexts, fall back to the full contexts
154
- if not contexts_highlighted and processed_example["full_contexts"]:
155
- for i, ctx in enumerate(processed_example["full_contexts"]):
156
  contexts_highlighted.append({
157
- 'chunk_num': i + 1,
158
- 'content': ctx.get('content', ''),
159
- 'is_primary': False
160
  })
161
 
162
  processed_example["contexts"] = contexts_highlighted
 
163
 
164
  return processed_example
165
 
 
3
  import pandas as pd
4
  import random
5
  import re
 
6
 
7
  # Global data store - loaded once at import time
8
  _ARENA_DATA = None
 
39
  return {
40
  "question": "Could not load questions from the dataset. Please check the data file.",
41
  "processed_context_desc": "Error: Data not available",
42
+ "contexts": [],
43
+ "full_contexts": [],
44
  "Answerable": False,
45
+ "insufficient": True,
46
+ "insufficient_reason": "Data loading error"
47
  }
48
 
49
  def get_random_example():
 
64
  # Process the example data
65
  processed_example = {
66
  "question": example['question'],
67
+ "Answerable": not example.get('insufficient', False),
 
68
  "insufficient": example.get('insufficient', False),
69
+ "insufficient_reason": example.get('insufficient_reason', ''),
70
+ "sample_id": example.get('sample_id', 0)
71
  }
72
 
73
+ # Process the context description - ensure it's a non-empty string
74
+ context_desc = example.get('processed_context_desc', '')
75
+ if pd.isna(context_desc):
76
+ context_desc = ""
77
+ # Add the description to the processed example
78
+ processed_example["processed_context_desc"] = context_desc
79
+
80
+ # Process full contexts - from the 'contexts' column
81
+ full_contexts = []
82
  try:
83
+ if 'contexts' in example and example['contexts']:
84
+ # Try to parse contexts as JSON if it's a string
85
+ contexts_str = example['contexts']
86
+
87
+ if isinstance(contexts_str, str):
88
+ # Try to parse as list literal first (for Python list representation)
89
+ if contexts_str.strip().startswith('[') and contexts_str.strip().endswith(']'):
90
+ try:
91
+ # This is for handling Python list literals like "['string1', 'string2']"
92
+ import ast
93
+ contexts_list = ast.literal_eval(contexts_str)
94
+
95
+ # Process each context string in the list
96
+ for ctx in contexts_list:
97
+ full_contexts.append(ctx)
98
+ except (SyntaxError, ValueError) as e:
99
+ # If ast.literal_eval fails, try JSON
100
+ try:
101
+ contexts_list = json.loads(contexts_str)
102
+
103
+ # Process each context in the list
104
+ for ctx in contexts_list:
105
+ if isinstance(ctx, str):
106
+ full_contexts.append(ctx)
107
+ elif isinstance(ctx, dict) and 'content' in ctx:
108
+ full_contexts.append(ctx.get('content', ''))
109
+ except json.JSONDecodeError:
110
+ # Not valid JSON, treat as a single context
111
+ full_contexts.append(contexts_str)
112
+ else:
113
+ # Single context string (not JSON array or list literal)
114
+ full_contexts.append(contexts_str)
115
+ elif isinstance(contexts_str, list):
116
+ # Already a list, process directly
117
+ for ctx in contexts_str:
118
+ if isinstance(ctx, str):
119
+ full_contexts.append(ctx)
120
+ elif isinstance(ctx, dict) and 'content' in ctx:
121
+ full_contexts.append(ctx.get('content', ''))
122
  except Exception as e:
123
+ print(f"Error processing full contexts: {e}")
 
124
 
125
+ # Process highlighted contexts - from contexts_highlighted column
126
  contexts_highlighted = []
 
127
  try:
128
+ # Process contexts_highlighted - this is stored as a string in CSV
129
  if 'contexts_highlighted' in example and example['contexts_highlighted']:
130
+ highlights_str = example['contexts_highlighted']
131
 
132
+ if isinstance(highlights_str, str):
133
  try:
134
+ # Try to parse as JSON array
135
+ highlights_list = json.loads(highlights_str)
 
 
 
 
 
 
 
 
 
136
 
137
+ # Process each highlighted context
138
+ for i, ctx in enumerate(highlights_list):
139
+ if isinstance(ctx, dict):
140
+ ctx_type = ctx.get('type', 'secondary')
141
+ content = ctx.get('abbreviatedContent', '')
142
+
143
+ # The content already has HTML span tags for highlights
144
+ contexts_highlighted.append({
145
+ 'is_primary': ctx_type == 'primary',
146
+ 'content': content
147
+ })
148
+ except json.JSONDecodeError:
149
+ print(f"Error parsing contexts_highlighted JSON: {highlights_str[:100]}...")
150
+ elif isinstance(highlights_str, list):
151
+ # Already a list, process directly
152
+ for ctx in highlights_str:
153
+ if isinstance(ctx, dict):
154
+ ctx_type = ctx.get('type', 'secondary')
155
+ content = ctx.get('abbreviatedContent', '')
156
 
157
+ contexts_highlighted.append({
158
+ 'is_primary': ctx_type == 'primary',
159
+ 'content': content
160
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  except Exception as e:
162
  print(f"Error processing highlighted contexts: {e}")
163
 
164
+ # Make sure we have the highlighted contexts populated even if there are no contexts_highlighted
165
+ if not contexts_highlighted and full_contexts:
166
+ for content in full_contexts:
167
  contexts_highlighted.append({
168
+ 'is_primary': False,
169
+ 'content': content
 
170
  })
171
 
172
  processed_example["contexts"] = contexts_highlighted
173
+ processed_example["full_contexts"] = full_contexts
174
 
175
  return processed_example
176