SLM-RAG-Arena

Running on Zero

App Files Files Community

oliver-aizip commited on May 8

Commit

347797e

1 Parent(s): e7fd2d9

update data pipeline

Browse files

Files changed (4) hide show

app.py +61 -32
utils/arena_df.csv +0 -0
utils/context_processor.py +57 -467
utils/data_loader.py +91 -80

app.py CHANGED Viewed

@@ -37,6 +37,7 @@ def load_context(set_interrupt=False):
     generation_interrupt.clear()
     example = get_random_example()
     context_desc = example.get('processed_context_desc', '')
     if context_desc:
         context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
@@ -75,14 +76,17 @@ def generate_model_summaries(example):
     try:
         m_a_name, m_b_name = random.sample(model_names, 2)
         s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
         if not generation_interrupt.is_set():
-            result["model_a"] = m_a_name
-            result["model_b"] = m_b_name
             result["summary_a"] = s_a
             result["summary_b"] = s_b
-            result["completed"] = True
     except Exception as e:
         print(f"Error in generation: {e}")
@@ -90,16 +94,20 @@ def generate_model_summaries(example):
 def process_generation_result(result):
     """Process the results from the generation function"""
-    if not result["completed"]:
-        # Generation was interrupted or failed
         return [
-            "", "", "", "", None, [], False, load_leaderboard_data(),
-            gr.update(value="Generation was interrupted or failed. Please try again."),
-            gr.update(value="Generation was interrupted or failed. Please try again."),
-            gr.update(interactive=True, elem_classes=["vote-button"]),
-            gr.update(interactive=True, elem_classes=["vote-button"]),
-            gr.update(interactive=True, elem_classes=["vote-button"]),
-            gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]),
             gr.update(choices=[], value=[], interactive=False, visible=False),
             gr.update(visible=False),
             gr.update(interactive=False, visible=True),
@@ -108,6 +116,9 @@ def process_generation_result(result):
             gr.update(elem_classes=[])
         ]
     # Generation completed successfully
     agg_results = load_leaderboard_data()
     return [
@@ -116,10 +127,10 @@ def process_generation_result(result):
         None, [], False, agg_results,
         gr.update(value=result["summary_a"]),
         gr.update(value=result["summary_b"]),
-        gr.update(interactive=True, elem_classes=["vote-button"]),
-        gr.update(interactive=True, elem_classes=["vote-button"]),
-        gr.update(interactive=True, elem_classes=["vote-button"]),
-        gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]),
         gr.update(choices=[], value=[], interactive=False, visible=False),
         gr.update(visible=False),
         gr.update(interactive=False, visible=True),
@@ -178,10 +189,10 @@ def show_loading_state():
     return [
         gr.update(value="Loading new question and summaries...", interactive=False),
         gr.update(value="Loading new question and summaries...", interactive=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False)
     ]
 def handle_new_example_click():
@@ -191,9 +202,14 @@ def handle_new_example_click():
 def update_ui_for_new_context(example):
     """Update UI with new context information"""
     return [
         gr.update(value=example['question']),
-        gr.update(value=example.get('processed_context_desc', ''), visible=bool(example.get('processed_context_desc', ''))),
         gr.update(value=get_context_html(example, False)),
         gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
         False
@@ -204,6 +220,8 @@ def cleanup_on_disconnect():
     """Clean up resources when browser disconnects"""
     print(f"Browser disconnected. Cleaning up resources...")
     generation_interrupt.set()
 # Create Gradio interface
 with gr.Blocks(theme=gr.themes.Default(
@@ -213,11 +231,11 @@ with gr.Blocks(theme=gr.themes.Default(
     # Load CSS
     css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
-    # Load the files
     with open(css_path, 'r') as f:
         css_content = f.read()
-    # Create HTML components with CSS and JavaScript links
     gr.HTML(f"<style>{css_content}</style>")
     # Add JavaScript to handle browser unload events
@@ -309,10 +327,10 @@ with gr.Blocks(theme=gr.themes.Default(
                 # Voting section
                 gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
                 with gr.Row():
-                    vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"])
-                    vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"])
-                    vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
-                    vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"])
                 # Feedback and Submit sections
                 with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
@@ -388,15 +406,17 @@ The Elo rating system provides a more accurate ranking than simple win rates:
         outputs=[results_table_display]
     )
-    # Random Question and Try Another buttons with interruption
     for btn in [random_question_btn, try_another_btn]:
-        btn.click(
-            fn=show_loading_state,  # First show loading state
             inputs=[],
             outputs=[summary_a_display, summary_b_display, vote_button_a,
                     vote_button_b, vote_button_tie, vote_button_neither]
         ).then(
-            fn=handle_new_example_click,  # Now uses the centralized approach
             inputs=[],
             outputs=[current_example]
         ).then(
@@ -404,9 +424,18 @@ The Elo rating system provides a more accurate ranking than simple win rates:
             inputs=[current_example],
             outputs=[query_display, context_description, context_display,
                     context_toggle_btn, show_full_context]
         ).then(
             fn=process_example,
-            inputs=[current_example],
             outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
                     selected_winner, feedback_list, show_results_state, results_agg,
                     summary_a_display, summary_b_display, vote_button_a, vote_button_b,

     generation_interrupt.clear()
     example = get_random_example()
+    # Format the context description
     context_desc = example.get('processed_context_desc', '')
     if context_desc:
         context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
     try:
         m_a_name, m_b_name = random.sample(model_names, 2)
+        # Track the partial completion state
+        result["model_a"] = m_a_name
+        result["model_b"] = m_b_name
         s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
         if not generation_interrupt.is_set():
             result["summary_a"] = s_a
             result["summary_b"] = s_b
+            result["completed"] = bool(s_a and s_b)  # Only mark complete if both have content
     except Exception as e:
         print(f"Error in generation: {e}")
 def process_generation_result(result):
     """Process the results from the generation function"""
+    if not result["completed"] or not result["summary_a"] or not result["summary_b"]:
+        # Either generation was interrupted or both summaries aren't ready
         return [
+            result.get("model_a", ""),
+            result.get("model_b", ""),
+            result.get("summary_a", ""),
+            result.get("summary_b", ""),
+            None, [], False, load_leaderboard_data(),
+            gr.update(value=result.get("summary_a", "Generation was interrupted or failed.")),
+            gr.update(value=result.get("summary_b", "Generation was interrupted or failed.")),
+            gr.update(interactive=False, elem_classes=["vote-button"]),  # Explicitly disable
+            gr.update(interactive=False, elem_classes=["vote-button"]),
+            gr.update(interactive=False, elem_classes=["vote-button"]),
+            gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]),
             gr.update(choices=[], value=[], interactive=False, visible=False),
             gr.update(visible=False),
             gr.update(interactive=False, visible=True),
             gr.update(elem_classes=[])
         ]
+    # Only enable voting when both summaries are complete and non-empty
+    buttons_interactive = bool(result["summary_a"] and result["summary_b"])
     # Generation completed successfully
     agg_results = load_leaderboard_data()
     return [
         None, [], False, agg_results,
         gr.update(value=result["summary_a"]),
         gr.update(value=result["summary_b"]),
+        gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
+        gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
+        gr.update(interactive=buttons_interactive, elem_classes=["vote-button"]),
+        gr.update(interactive=buttons_interactive, elem_classes=["vote-button", "vote-button-neither"]),
         gr.update(choices=[], value=[], interactive=False, visible=False),
         gr.update(visible=False),
         gr.update(interactive=False, visible=True),
     return [
         gr.update(value="Loading new question and summaries...", interactive=False),
         gr.update(value="Loading new question and summaries...", interactive=False),
+        gr.update(interactive=False),  # For vote_button_a
+        gr.update(interactive=False),  # For vote_button_b
+        gr.update(interactive=False),  # For vote_button_tie
+        gr.update(interactive=False)   # For vote_button_neither
     ]
 def handle_new_example_click():
 def update_ui_for_new_context(example):
     """Update UI with new context information"""
+    # Format the context description
+    context_desc = example.get('processed_context_desc', '')
+    if context_desc:
+        context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
     return [
         gr.update(value=example['question']),
+        gr.update(value=context_desc, visible=bool(context_desc)),
         gr.update(value=get_context_html(example, False)),
         gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
         False
     """Clean up resources when browser disconnects"""
     print(f"Browser disconnected. Cleaning up resources...")
     generation_interrupt.set()
+    # No need for time.sleep here as this is just setting the flag
+    # Threads will detect it on their next check
 # Create Gradio interface
 with gr.Blocks(theme=gr.themes.Default(
     # Load CSS
     css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
+    # Load the CSS file
     with open(css_path, 'r') as f:
         css_content = f.read()
+    # Create HTML components with CSS
     gr.HTML(f"<style>{css_content}</style>")
     # Add JavaScript to handle browser unload events
                 # Voting section
                 gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
                 with gr.Row():
+                    vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"], interactive=False)
+                    vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"], interactive=False)
+                    vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"], interactive=False)
+                    vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"], interactive=False)
                 # Feedback and Submit sections
                 with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
         outputs=[results_table_display]
     )
+    # Alternative approach: use two separate clicks for each button
+    # First click event: Update UI immediately
     for btn in [random_question_btn, try_another_btn]:
+        # Handle UI updates first
+        event1 = btn.click(
+            fn=show_loading_state,
             inputs=[],
             outputs=[summary_a_display, summary_b_display, vote_button_a,
                     vote_button_b, vote_button_tie, vote_button_neither]
         ).then(
+            fn=handle_new_example_click,
             inputs=[],
             outputs=[current_example]
         ).then(
             inputs=[current_example],
             outputs=[query_display, context_description, context_display,
                     context_toggle_btn, show_full_context]
+        )
+    # Second click event for each button runs in parallel with the first
+    for btn in [random_question_btn, try_another_btn]:
+        # Generate model outputs (potentially slower operation)
+        event2 = btn.click(
+            fn=handle_new_example_click,  # This will be called separately from the first event
+            inputs=[],
+            outputs=[current_example]
         ).then(
             fn=process_example,
+            inputs=[current_example],
             outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
                     selected_winner, feedback_list, show_results_state, results_agg,
                     summary_a_display, summary_b_display, vote_button_a, vote_button_b,

utils/arena_df.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

utils/context_processor.py CHANGED Viewed

@@ -1,460 +1,51 @@
 import re
 import html
 import json
-from typing import Dict, List, Tuple, Optional, Any, Union
-class ContextProcessor:
-    """Processes highlighted contexts for the RAG Summarizer Arena"""
-    # Common HTML entities that might be incomplete
-    INCOMPLETE_ENTITIES = {
-        '&#x27': '&#x27;',
-        '&quot': '&quot;',
-        '&lt': '&lt;',
-        '&gt': '&gt;',
-        '&amp': '&amp;'
-    }
-    @staticmethod
-    def clean_text(text: str) -> str:
-        """Cleans text by fixing HTML entities and handling escaped characters"""
-        if not text or not isinstance(text, str):
-            return text
-        # Fix incomplete HTML entities
-        for incomplete, complete in ContextProcessor.INCOMPLETE_ENTITIES.items():
-            text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text)
-        # Convert HTML entities to characters
-        try:
-            text = html.unescape(text)
-        except Exception:
-            pass
-        # Handle escaped quotes and special characters
-        replacements = {
-            r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\",
-            '"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'"
-        }
-        for pattern, replacement in replacements.items():
-            text = text.replace(pattern, replacement)
-        # Remove trailing backslash if present
-        if text.rstrip().endswith('\\'):
-            text = text.rstrip().rstrip('\\')
-        return text
-    @staticmethod
-    def balance_highlight_tags(text: str) -> str:
-        """Ensures highlight tags are properly balanced"""
-        if not text or not isinstance(text, str):
-            return text
-        # Define highlight tag patterns
-        highlight_pairs = [
-            ('[[start_highlight]]', '[[end_highlight]]'),
-            ('[[highlight_start]]', '[[highlight_end]]'),
-            ('<span class="highlight">', '</span>')
-        ]
-        # Check and balance each pair
-        for start_tag, end_tag in highlight_pairs:
-            start_count = text.count(start_tag)
-            end_count = text.count(end_tag)
-            # Add missing tags if needed
-            if start_count > end_count:
-                text += end_tag * (start_count - end_count)
-            elif end_count > start_count:
-                text = start_tag * (end_count - start_count) + text
-        return text
-    @staticmethod
-    def balance_quotes(text: str) -> str:
-        """Ensures quotes are properly balanced"""
-        if not text or not isinstance(text, str):
-            return text
-        # First, remove escaped quotes from the count
-        plain_text = text.replace('\\"', '')
-        # Count quotes and balance if needed
-        quote_count = plain_text.count('"')
-        if quote_count % 2 == 1:
-            text += '"'
         return text
-    @staticmethod
-    def extract_highlight_parts(text: str) -> List[Tuple[bool, str]]:
-        """
-        Extracts highlighted and non-highlighted parts from text, preserving order
-        """
-        # Ensure highlight tags are balanced
-        text = ContextProcessor.balance_highlight_tags(text)
-        # Define all highlight patterns
-        highlight_patterns = [
-            ('[[start_highlight]]', '[[end_highlight]]'),
-            ('[[highlight_start]]', '[[highlight_end]]'),
-            ('<span class="highlight">', '</span>')
-        ]
-        # Collect all highlight sections with their positions
-        all_highlights = []
-        for start_tag, end_tag in highlight_patterns:
-            # Escape special regex characters if needed
-            start_esc = re.escape(start_tag)
-            end_esc = re.escape(end_tag)
-            # Find all occurrences of this highlight pattern
-            for match in re.finditer(f"{start_esc}(.*?){end_esc}", text, re.DOTALL):
-                all_highlights.append({
-                    'start': match.start(),
-                    'end': match.end(),
-                    'content': match.group(1),
-                    'start_tag': start_tag,
-                    'end_tag': end_tag
-                })
-        # If no highlights found, return the whole text as unhighlighted
-        if not all_highlights:
-            return [(False, text)]
-        # Sort highlights by start position
-        all_highlights.sort(key=lambda x: x['start'])
-        # Build the parts list by processing text portions between and including highlights
-        parts = []
-        current_pos = 0
-        for highlight in all_highlights:
-            # Add non-highlighted text before this highlight
-            if highlight['start'] > current_pos:
-                parts.append((False, text[current_pos:highlight['start']]))
-            # Add the highlighted text
-            parts.append((True, highlight['content']))
-            # Update position to end of this highlight
-            current_pos = highlight['end']
-        # Add any remaining text after the last highlight
-        if current_pos < len(text):
-            parts.append((False, text[current_pos:]))
-        return parts
-    @staticmethod
-    def is_markdown_table(text: str) -> bool:
-        """Checks if text looks like a markdown table"""
-        if not text or not isinstance(text, str):
-            return False
-        if '|' in text and '\n' in text:
-            lines = text.strip().split('\n')
-            pipe_lines = sum(1 for line in lines if line.strip().startswith('|'))
-            return pipe_lines >= 2
-        return False
-    @staticmethod
-    def process_cell_content(cell_text: str) -> str:
-        """Processes a single table cell, handling highlights if present"""
-        # Clean and prepare the text
-        cell_text = ContextProcessor.clean_text(cell_text)
-        cell_text = ContextProcessor.balance_quotes(cell_text)
-        # Check if cell has any highlight tags
-        has_highlights = False
-        highlight_patterns = [
-            '[[start_highlight]]', '[[end_highlight]]',
-            '[[highlight_start]]', '[[highlight_end]]',
-            '<span class="highlight">', '</span>'
-        ]
-        for pattern in highlight_patterns:
-            if pattern in cell_text:
-                has_highlights = True
-                break
-        if has_highlights:
-            # Extract and process highlight parts
-            parts = ContextProcessor.extract_highlight_parts(cell_text)
-            # Build the result
-            result = ""
-            for is_highlighted, part in parts:
-                if is_highlighted:
-                    result += f'<span class="highlight">{html.escape(part)}</span>'
-                else:
-                    result += html.escape(part)
-            return result
-        else:
-            # Just escape HTML in regular cells
-            return html.escape(cell_text)
-    @staticmethod
-    def convert_table_to_html(text: str) -> str:
-        """Converts markdown table to HTML with support for highlights in cells"""
-        # Clean the text
-        text = ContextProcessor.clean_text(text)
-        # Split into lines and get table rows
-        lines = text.strip().split('\n')
-        table_lines = [line for line in lines if line.strip().startswith('|')]
-        # Check if it's a proper table
-        if len(table_lines) < 2:
-            return ContextProcessor.process_text(text)
-        # Check if second line is a separator (----)
-        has_header = False
-        if len(table_lines) >= 2 and '---' in table_lines[1]:
-            has_header = True
-        # Start building HTML table
-        html_output = '<table class="md-table">'
-        if has_header:
-            # Process header row
-            header_line = table_lines[0]
-            # Split by pipe and remove empty first and last elements
-            cells = [cell.strip() for cell in header_line.split('|')]
-            if cells and not cells[0]:
-                cells.pop(0)
-            if cells and not cells[-1]:
-                cells.pop()
-            html_output += '<thead><tr>'
-            for cell in cells:
-                cell_html = ContextProcessor.process_cell_content(cell)
-                html_output += f'<th>{cell_html}</th>'
-            html_output += '</tr></thead>'
-            # Process data rows (skip header and separator)
-            html_output += '<tbody>'
-            for line in table_lines[2:]:
-                cells = [cell.strip() for cell in line.split('|')]
-                if cells and not cells[0]:
-                    cells.pop(0)
-                if cells and not cells[-1]:
-                    cells.pop()
-                html_output += '<tr>'
-                for cell in cells:
-                    cell_html = ContextProcessor.process_cell_content(cell)
-                    html_output += f'<td>{cell_html}</td>'
-                html_output += '</tr>'
-            html_output += '</tbody>'
-        else:
-            # All rows are data
-            html_output += '<tbody>'
-            for line in table_lines:
-                cells = [cell.strip() for cell in line.split('|')]
-                if cells and not cells[0]:
-                    cells.pop(0)
-                if cells and not cells[-1]:
-                    cells.pop()
-                html_output += '<tr>'
-                for cell in cells:
-                    cell_html = ContextProcessor.process_cell_content(cell)
-                    html_output += f'<td>{cell_html}</td>'
-                html_output += '</tr>'
-            html_output += '</tbody>'
-        html_output += '</table>'
-        return html_output
-    @staticmethod
-    def process_text(text: str) -> str:
-        """Processes text with highlights, handling all edge cases"""
-        # Clean and prepare the text
-        text = ContextProcessor.clean_text(text)
-        text = ContextProcessor.balance_quotes(text)
-        text = ContextProcessor.balance_highlight_tags(text)
-        # Extract and process highlight parts
-        parts = ContextProcessor.extract_highlight_parts(text)
-        # Build the result
-        result = ""
-        for is_highlighted, part in parts:
-            if is_highlighted:
-                escaped_part = html.escape(part)
-                result += f'<span class="highlight">{escaped_part}</span>'
-            else:
-                result += html.escape(part)
-        return result
-    @staticmethod
-    def process_content(content: str, abbreviated_content: Optional[str] = None) -> str:
-        """Main function to process any kind of content"""
-        # Handle null/empty content
-        if not content or not isinstance(content, str):
-            return ""
-        # Special cases that need abbreviated content
-        special_cases = [
-            lambda c: c.strip() == "In Oklahoma,",
-            lambda c: c.strip().startswith('"') and c.count('"') == 1,
-            lambda c: c.rstrip().endswith('\\'),
-            lambda c: (c.replace('\\"', '').count('"') % 2) == 1,
-            lambda c: any((c.count(start) != c.count(end)) for start, end in [
-                ('[[start_highlight]]', '[[end_highlight]]'),
-                ('[[highlight_start]]', '[[highlight_end]]'),
-                ('<span class="highlight">', '</span>')
-            ])
-        ]
-        # Check if we need to use abbreviated content
-        needs_abbreviated = any(check(content) for check in special_cases)
-        # If content needs help and we have abbreviated content, use it
-        if needs_abbreviated and abbreviated_content:
-            # Handle abbreviated content that might be a JSON string
-            if abbreviated_content.strip().startswith('{') and abbreviated_content.strip().endswith('}'):
-                try:
-                    data = json.loads(abbreviated_content)
-                    if "abbreviatedContent" in data:
-                        abbreviated_content = data["abbreviatedContent"]
-                except json.JSONDecodeError:
-                    pass
-            # Clean and prepare the abbreviated content
-            abbreviated_content = ContextProcessor.clean_text(abbreviated_content)
-            abbreviated_content = ContextProcessor.balance_quotes(abbreviated_content)
-            abbreviated_content = ContextProcessor.balance_highlight_tags(abbreviated_content)
-            # Use abbreviated content instead
-            content = abbreviated_content
-        # Check if content is a markdown table
-        if ContextProcessor.is_markdown_table(content):
-            return ContextProcessor.convert_table_to_html(content)
-        else:
-            return ContextProcessor.process_text(content)
-    @staticmethod
-    def parse_json_contexts(context_json: str) -> List[Dict[str, Any]]:
-        """Parses JSON-formatted context data with fallback to regex extraction"""
-        contexts = []
-        # First try standard JSON parsing
-        try:
-            contexts = json.loads(context_json)
-            if not isinstance(contexts, list):
-                contexts = []
-        except json.JSONDecodeError:
-            # If standard parsing fails, use regex to extract the data
-            try:
-                # Extract type field
-                type_pattern = r'"type":\s*"(primary|secondary)"'
-                types = re.findall(type_pattern, context_json)
-                # Extract abbreviatedContent field - more robustly handle quotes
-                content_pattern = r'"abbreviatedContent":\s*"((?:\\.|[^"])*?)"'
-                contents = re.findall(content_pattern, context_json)
-                # Build context objects
-                for i, (ctx_type, content) in enumerate(zip(types, contents)):
-                    contexts.append({
-                        'type': ctx_type,
-                        'abbreviatedContent': content.replace('\\"', '"')
-                    })
-            except Exception as e:
-                print(f"Error extracting contexts with regex: {e}")
-        return contexts
-    @staticmethod
-    def process_json_contexts(context_json: str) -> List[Dict[str, Any]]:
-        """Process JSON-formatted highlighted contexts"""
-        processed_contexts = []
-        try:
-            # Parse the JSON contexts
-            contexts = ContextProcessor.parse_json_contexts(context_json)
-            # Process each context item
-            for i, item in enumerate(contexts):
-                if isinstance(item, dict):
-                    context_type = item.get('type', 'secondary')
-                    content = item.get('abbreviatedContent', '')
-                    # Process the content
-                    processed_content = ContextProcessor.process_content(content)
-                    # Create processed context item
-                    processed_contexts.append({
-                        'chunk_num': i + 1,
-                        'content': processed_content,
-                        'is_primary': context_type == 'primary'
-                    })
-        except Exception as e:
-            print(f"Error processing JSON contexts: {e}")
-        return processed_contexts
-# Module-level functions for backward compatibility
-def clean_text(text):
-    return ContextProcessor.clean_text(text)
-def balance_highlight_tags(text):
-    return ContextProcessor.balance_highlight_tags(text)
-def balance_quotes(text):
-    return ContextProcessor.balance_quotes(text)
-def extract_highlight_parts(text):
-    return ContextProcessor.extract_highlight_parts(text)
-def is_markdown_table(text):
-    return ContextProcessor.is_markdown_table(text)
-def process_cell_content(cell_text):
-    return ContextProcessor.process_cell_content(cell_text)
-def convert_table_to_html(text):
-    return ContextProcessor.convert_table_to_html(text)
-def process_text(text):
-    return ContextProcessor.process_text(text)
-def process_content(content, abbreviated_content=None):
-    return ContextProcessor.process_content(content, abbreviated_content)
-def process_highlights(text):
-    """Main entry point called from data_loader.py"""
-    return ContextProcessor.process_content(text)
 def get_context_html(example, show_full=False):
-    """Format context chunks into HTML for display"""
     html_output = ""
     # Process insufficient context warning if needed
     if example.get("insufficient", False):
         insufficient_reason = example.get("insufficient_reason", "")
-        reason_html = (
-            f"<p>{insufficient_reason}</p>" if insufficient_reason else
-            "<p>The context may not contain enough information to fully answer the question, "
-            "or the question might be ambiguous. Models should ideally indicate this limitation "
-            "or refuse to answer.</p>"
-        )
         html_output += f"""
         <div class="insufficient-alert">
             <strong>
-                <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none"
-                     stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
-                     style="vertical-align: middle; margin-right: 5px;">
                     <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
                     <line x1="12" y1="9" x2="12" y2="13"></line>
                     <line x1="12" y1="17" x2="12.01" y2="17"></line>
@@ -467,40 +58,39 @@ def get_context_html(example, show_full=False):
     html_output += '<div class="context-items-container">'
-    # Display full contexts if requested
-    if show_full and "full_contexts" in example and example["full_contexts"]:
-        for context_item in example["full_contexts"]:
-            content = context_item.get('content', '')
-            abbreviated = context_item.get('abbreviatedContent', None)
-            # Process the content
-            processed = ContextProcessor.process_content(content, abbreviated)
-            html_output += f'<div class="context-item">{processed}</div>'
     else:
-        # Display regular contexts if available
         if "contexts" in example and example["contexts"]:
             for context_item in example["contexts"]:
-                content = context_item.get('content', '')
-                abbreviated = context_item.get('abbreviatedContent', None)
-                # Process the content
-                processed = ContextProcessor.process_content(content, abbreviated)
-                is_primary = context_item.get('is_primary', False)
-                extra_class = " primary-context" if is_primary else ""
-                html_output += f'<div class="context-item{extra_class}">{processed}</div>'
-        # Or process JSON-structured highlighted contexts
-        elif "contexts_highlighted" in example and example["contexts_highlighted"]:
-            processed_contexts = ContextProcessor.process_json_contexts(example["contexts_highlighted"])
-            for context_item in processed_contexts:
-                is_primary = context_item.get('is_primary', False)
-                extra_class = " primary-context" if is_primary else ""
-                html_output += f'<div class="context-item{extra_class}">{context_item["content"]}</div>'
         else:
             html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>'

 import re
 import html
 import json
+def clean_text(text):
+    """Clean text with common issues like HTML entities and escaped quotes."""
+    if not text or not isinstance(text, str):
         return text
+    # Fix incomplete HTML entities
+    incomplete_entities = {'&#x27': '&#x27;', '&quot': '&quot;', '&lt': '&lt;', '&gt': '&gt;', '&amp': '&amp;'}
+    for incomplete, complete in incomplete_entities.items():
+        text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text)
+    # Convert HTML entities to characters
+    try:
+        text = html.unescape(text)
+    except Exception:
+        pass
+    # Handle escaped quotes and other special characters
+    replacements = {
+        r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\",
+        # Also normalize fancy quotes
+        '"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'"
+    }
+    for pattern, replacement in replacements.items():
+        text = text.replace(pattern, replacement)
+    # Remove trailing backslash if present
+    if text.rstrip().endswith('\\'):
+        text = text.rstrip().rstrip('\\')
+    return text
 def get_context_html(example, show_full=False):
+    """Format context chunks into HTML for display."""
     html_output = ""
     # Process insufficient context warning if needed
     if example.get("insufficient", False):
         insufficient_reason = example.get("insufficient_reason", "")
+        reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>"
         html_output += f"""
         <div class="insufficient-alert">
             <strong>
+                <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;">
                     <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
                     <line x1="12" y1="9" x2="12" y2="13"></line>
                     <line x1="12" y1="17" x2="12.01" y2="17"></line>
     html_output += '<div class="context-items-container">'
+    # Display full contexts or highlighted contexts based on toggle
+    if show_full:
+        # Show full context - directly use the strings from the list in full_contexts
+        if "full_contexts" in example and example["full_contexts"]:
+            for context_item in example["full_contexts"]:
+                if isinstance(context_item, dict) and 'content' in context_item:
+                    content = context_item.get('content', '')
+                elif isinstance(context_item, str):
+                    content = context_item
+                else:
+                    content = str(context_item)
+                # Escape HTML entities for safe display
+                escaped_content = html.escape(content)
+                # Create the context item box - no headers
+                html_output += f'<div class="context-item">{escaped_content}</div>'
     else:
+        # Show highlighted contexts
         if "contexts" in example and example["contexts"]:
             for context_item in example["contexts"]:
+                if isinstance(context_item, dict):
+                    content = context_item.get('content', '')
+                    is_primary = context_item.get('is_primary', False)
+                    # Extra class for primary context styling
+                    extra_class = " primary-context" if is_primary else ""
+                    # Use content directly as it already has HTML highlighting
+                    html_output += f'<div class="context-item{extra_class}">{content}</div>'
+                elif isinstance(context_item, str):
+                    # For direct string contexts
+                    html_output += f'<div class="context-item">{context_item}</div>'
         else:
             html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>'

utils/data_loader.py CHANGED Viewed

@@ -3,7 +3,6 @@ import json
 import pandas as pd
 import random
 import re
-from .context_processor import process_highlights
 # Global data store - loaded once at import time
 _ARENA_DATA = None
@@ -40,10 +39,11 @@ def create_dummy_example():
     return {
         "question": "Could not load questions from the dataset. Please check the data file.",
         "processed_context_desc": "Error: Data not available",
-        "contexts": ["No context available"],
-        "full_context": "Error loading context data.",
         "Answerable": False,
-        "insufficient": True
     }
 def get_random_example():
@@ -64,102 +64,113 @@ def get_random_example():
     # Process the example data
     processed_example = {
         "question": example['question'],
-        "processed_context_desc": example.get('processed_context_desc', ''),
-        "Answerable": example.get('Answerable', True),  # Default to True unless specified otherwise
         "insufficient": example.get('insufficient', False),
-        "insufficient_reason": example.get('insufficient_reason', '')
     }
-    # Process contexts - for full context
     try:
-        contexts_raw = example['contexts']
-        if isinstance(contexts_raw, str):
-            contexts = json.loads(contexts_raw)
-            # Store full contexts as individual items
-            full_contexts = []
-            if isinstance(contexts, list):
-                for i, chunk in enumerate(contexts):
-                    if isinstance(chunk, dict) and 'content' in chunk:
-                        full_contexts.append({
-                            'chunk_num': i + 1,
-                            'content': chunk.get('content', '')
-                        })
-            processed_example["full_contexts"] = full_contexts
-        else:
-            processed_example["full_contexts"] = []
     except Exception as e:
-        print(f"Error processing contexts: {e}")
-        processed_example["full_contexts"] = []
-    # Process highlighted contexts for display
     contexts_highlighted = []
     try:
-        # Check if contexts_highlighted exists
         if 'contexts_highlighted' in example and example['contexts_highlighted']:
-            highlighted_contexts = []
-            if isinstance(example['contexts_highlighted'], str):
                 try:
-                    # Try direct JSON parsing first
-                    raw_str = example['contexts_highlighted']
-                    # First, manually parse the highlighted contexts using regex
-                    # This is a more robust approach for our specific format
-                    type_pattern = r'"type":\s*"(primary|secondary)"'
-                    content_pattern = r'"abbreviatedContent":\s*"([^"]*)"|"abbreviatedContent":\s*"([^"]*)'
-                    types = re.findall(type_pattern, raw_str)
-                    # Handle both regular quotes and escaped quotes in content
-                    raw_contents = re.findall(content_pattern, raw_str)
-                    # Extract contents from tuple matches (the regex has capture groups)
-                    contents = []
-                    for match in raw_contents:
-                        # Get the non-empty string from the tuple
-                        content = next((s for s in match if s), "")
-                        contents.append(content)
-                    # Create the highlighted contexts from extracted data
-                    for i, (ctx_type, content) in enumerate(zip(types, contents)):
-                        highlighted_contexts.append({
-                            'type': ctx_type,
-                            'abbreviatedContent': content
-                        })
-                except Exception as e:
-                    print(f"Error extracting contexts with regex: {e}")
-            else:
-                # Already an object, not a string
-                highlighted_contexts = example['contexts_highlighted']
-            # Process each context item
-            for i, item in enumerate(highlighted_contexts):
-                if isinstance(item, dict):
-                    ctx_type = item.get('type', 'secondary')
-                    content = item.get('abbreviatedContent', '')
-                    # Process highlights using the standard format
-                    content = process_highlights(content)
-                    contexts_highlighted.append({
-                        'chunk_num': i + 1,
-                        'content': content,
-                        'is_primary': ctx_type == 'primary'
-                    })
     except Exception as e:
         print(f"Error processing highlighted contexts: {e}")
-    # If we couldn't process the highlighted contexts, fall back to the full contexts
-    if not contexts_highlighted and processed_example["full_contexts"]:
-        for i, ctx in enumerate(processed_example["full_contexts"]):
             contexts_highlighted.append({
-                'chunk_num': i + 1,
-                'content': ctx.get('content', ''),
-                'is_primary': False
             })
     processed_example["contexts"] = contexts_highlighted
     return processed_example

 import pandas as pd
 import random
 import re
 # Global data store - loaded once at import time
 _ARENA_DATA = None
     return {
         "question": "Could not load questions from the dataset. Please check the data file.",
         "processed_context_desc": "Error: Data not available",
+        "contexts": [],
+        "full_contexts": [],
         "Answerable": False,
+        "insufficient": True,
+        "insufficient_reason": "Data loading error"
     }
 def get_random_example():
     # Process the example data
     processed_example = {
         "question": example['question'],
+        "Answerable": not example.get('insufficient', False),
         "insufficient": example.get('insufficient', False),
+        "insufficient_reason": example.get('insufficient_reason', ''),
+        "sample_id": example.get('sample_id', 0)
     }
+    # Process the context description - ensure it's a non-empty string
+    context_desc = example.get('processed_context_desc', '')
+    if pd.isna(context_desc):
+        context_desc = ""
+    # Add the description to the processed example
+    processed_example["processed_context_desc"] = context_desc
+    # Process full contexts - from the 'contexts' column
+    full_contexts = []
     try:
+        if 'contexts' in example and example['contexts']:
+            # Try to parse contexts as JSON if it's a string
+            contexts_str = example['contexts']
+            if isinstance(contexts_str, str):
+                # Try to parse as list literal first (for Python list representation)
+                if contexts_str.strip().startswith('[') and contexts_str.strip().endswith(']'):
+                    try:
+                        # This is for handling Python list literals like "['string1', 'string2']"
+                        import ast
+                        contexts_list = ast.literal_eval(contexts_str)
+                        # Process each context string in the list
+                        for ctx in contexts_list:
+                            full_contexts.append(ctx)
+                    except (SyntaxError, ValueError) as e:
+                        # If ast.literal_eval fails, try JSON
+                        try:
+                            contexts_list = json.loads(contexts_str)
+                            # Process each context in the list
+                            for ctx in contexts_list:
+                                if isinstance(ctx, str):
+                                    full_contexts.append(ctx)
+                                elif isinstance(ctx, dict) and 'content' in ctx:
+                                    full_contexts.append(ctx.get('content', ''))
+                        except json.JSONDecodeError:
+                            # Not valid JSON, treat as a single context
+                            full_contexts.append(contexts_str)
+                else:
+                    # Single context string (not JSON array or list literal)
+                    full_contexts.append(contexts_str)
+            elif isinstance(contexts_str, list):
+                # Already a list, process directly
+                for ctx in contexts_str:
+                    if isinstance(ctx, str):
+                        full_contexts.append(ctx)
+                    elif isinstance(ctx, dict) and 'content' in ctx:
+                        full_contexts.append(ctx.get('content', ''))
     except Exception as e:
+        print(f"Error processing full contexts: {e}")
+    # Process highlighted contexts - from contexts_highlighted column
     contexts_highlighted = []
     try:
+        # Process contexts_highlighted - this is stored as a string in CSV
         if 'contexts_highlighted' in example and example['contexts_highlighted']:
+            highlights_str = example['contexts_highlighted']
+            if isinstance(highlights_str, str):
                 try:
+                    # Try to parse as JSON array
+                    highlights_list = json.loads(highlights_str)
+                    # Process each highlighted context
+                    for i, ctx in enumerate(highlights_list):
+                        if isinstance(ctx, dict):
+                            ctx_type = ctx.get('type', 'secondary')
+                            content = ctx.get('abbreviatedContent', '')
+                            # The content already has HTML span tags for highlights
+                            contexts_highlighted.append({
+                                'is_primary': ctx_type == 'primary',
+                                'content': content
+                            })
+                except json.JSONDecodeError:
+                    print(f"Error parsing contexts_highlighted JSON: {highlights_str[:100]}...")
+            elif isinstance(highlights_str, list):
+                # Already a list, process directly
+                for ctx in highlights_str:
+                    if isinstance(ctx, dict):
+                        ctx_type = ctx.get('type', 'secondary')
+                        content = ctx.get('abbreviatedContent', '')
+                        contexts_highlighted.append({
+                            'is_primary': ctx_type == 'primary',
+                            'content': content
+                        })
     except Exception as e:
         print(f"Error processing highlighted contexts: {e}")
+    # Make sure we have the highlighted contexts populated even if there are no contexts_highlighted
+    if not contexts_highlighted and full_contexts:
+        for content in full_contexts:
             contexts_highlighted.append({
+                'is_primary': False,
+                'content': content
             })
     processed_example["contexts"] = contexts_highlighted
+    processed_example["full_contexts"] = full_contexts
     return processed_example