Spaces:
Running
on
Zero
Running
on
Zero
Removed JS (#20)
Browse files- Removed JS (6d5a5902a790fb59f1f57ec6ee7e668ad6fe2995)
Co-authored-by: Kai <[email protected]>
app.py
CHANGED
|
@@ -3,7 +3,8 @@ import random
|
|
| 3 |
import pandas as pd
|
| 4 |
import os
|
| 5 |
import threading
|
| 6 |
-
import time
|
|
|
|
| 7 |
from utils.data_loader import get_random_example
|
| 8 |
from utils.models import generate_summaries, model_names
|
| 9 |
from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html
|
|
@@ -21,8 +22,19 @@ feedback_options = {
|
|
| 21 |
"Model B: Incomplete", "Model B: Hallucinate", "Model B: Irrelevant", "Model B: Incorrect refusal (if applicable)"]
|
| 22 |
}
|
| 23 |
|
| 24 |
-
def load_context():
|
| 25 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
generation_interrupt.clear()
|
| 27 |
example = get_random_example()
|
| 28 |
|
|
@@ -85,13 +97,14 @@ def process_generation_result(result):
|
|
| 85 |
"", "", "", "", None, [], False, load_leaderboard_data(),
|
| 86 |
gr.update(value="Generation was interrupted or failed. Please try again."),
|
| 87 |
gr.update(value="Generation was interrupted or failed. Please try again."),
|
| 88 |
-
|
| 89 |
-
gr.update(interactive=
|
| 90 |
-
gr.update(interactive=
|
| 91 |
-
gr.update(interactive=
|
|
|
|
| 92 |
gr.update(choices=[], value=[], interactive=False, visible=False),
|
| 93 |
gr.update(visible=False),
|
| 94 |
-
gr.update(interactive=
|
| 95 |
gr.update(visible=False),
|
| 96 |
gr.update(interactive=True),
|
| 97 |
gr.update(elem_classes=[])
|
|
@@ -105,6 +118,7 @@ def process_generation_result(result):
|
|
| 105 |
None, [], False, agg_results,
|
| 106 |
gr.update(value=result["summary_a"]),
|
| 107 |
gr.update(value=result["summary_b"]),
|
|
|
|
| 108 |
gr.update(interactive=True, elem_classes=["vote-button"]),
|
| 109 |
gr.update(interactive=True, elem_classes=["vote-button"]),
|
| 110 |
gr.update(interactive=True, elem_classes=["vote-button"]),
|
|
@@ -167,6 +181,7 @@ def show_loading_state():
|
|
| 167 |
return [
|
| 168 |
gr.update(value="Loading new question and summaries...", interactive=False),
|
| 169 |
gr.update(value="Loading new question and summaries...", interactive=False),
|
|
|
|
| 170 |
gr.update(interactive=False),
|
| 171 |
gr.update(interactive=False),
|
| 172 |
gr.update(interactive=False),
|
|
@@ -175,9 +190,8 @@ def show_loading_state():
|
|
| 175 |
|
| 176 |
def handle_new_example_click():
|
| 177 |
"""Handle clicking 'Get new example' button"""
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
return load_context()[0]
|
| 181 |
|
| 182 |
def update_ui_for_new_context(example):
|
| 183 |
"""Update UI with new context information"""
|
|
@@ -196,22 +210,10 @@ with gr.Blocks(theme=gr.themes.Default(
|
|
| 196 |
)) as demo:
|
| 197 |
# Load CSS
|
| 198 |
css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
|
| 199 |
-
|
| 200 |
-
# Make sure the JavaScript directory exists
|
| 201 |
-
os.makedirs(os.path.join(os.getcwd(), 'static', 'js'), exist_ok=True)
|
| 202 |
-
|
| 203 |
-
# Load the files
|
| 204 |
with open(css_path, 'r') as f:
|
| 205 |
css_content = f.read()
|
| 206 |
|
| 207 |
-
# Create HTML components with CSS and JavaScript links
|
| 208 |
gr.HTML(f"<style>{css_content}</style>")
|
| 209 |
-
|
| 210 |
-
# Load JavaScript file via script tag
|
| 211 |
-
js_path = os.path.join(os.getcwd(), 'static', 'js', 'scroll_helpers.js')
|
| 212 |
-
# Use relative path for the script source
|
| 213 |
-
js_path_relative = 'static/js/scroll_helpers.js'
|
| 214 |
-
gr.HTML(f'<script src="{js_path_relative}"></script>')
|
| 215 |
|
| 216 |
# State Variables
|
| 217 |
current_example = gr.State({})
|
|
@@ -290,10 +292,11 @@ with gr.Blocks(theme=gr.themes.Default(
|
|
| 290 |
# Voting section
|
| 291 |
gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
|
| 292 |
with gr.Row():
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
|
|
|
| 297 |
|
| 298 |
# Feedback and Submit sections
|
| 299 |
with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
|
|
@@ -337,7 +340,6 @@ The Elo rating system provides a more accurate ranking than simple win rates:
|
|
| 337 |
|
| 338 |
results_table_display = gr.HTML(label="Model Performance")
|
| 339 |
|
| 340 |
-
# Event handling
|
| 341 |
# Toggle context display
|
| 342 |
context_toggle_btn.click(
|
| 343 |
fn=toggle_context_display,
|
|
@@ -346,8 +348,9 @@ The Elo rating system provides a more accurate ranking than simple win rates:
|
|
| 346 |
)
|
| 347 |
|
| 348 |
# Initial loading - context first, then summaries
|
|
|
|
| 349 |
demo.load(
|
| 350 |
-
fn=load_context,
|
| 351 |
inputs=[],
|
| 352 |
outputs=[current_example, query_display, context_description, context_display,
|
| 353 |
context_toggle_btn, show_full_context]
|
|
@@ -376,7 +379,7 @@ The Elo rating system provides a more accurate ranking than simple win rates:
|
|
| 376 |
outputs=[summary_a_display, summary_b_display, vote_button_a,
|
| 377 |
vote_button_b, vote_button_tie, vote_button_neither]
|
| 378 |
).then(
|
| 379 |
-
fn=handle_new_example_click,
|
| 380 |
inputs=[],
|
| 381 |
outputs=[current_example]
|
| 382 |
).then(
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import os
|
| 5 |
import threading
|
| 6 |
+
import time
|
| 7 |
+
from pathlib import Path
|
| 8 |
from utils.data_loader import get_random_example
|
| 9 |
from utils.models import generate_summaries, model_names
|
| 10 |
from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html
|
|
|
|
| 22 |
"Model B: Incomplete", "Model B: Hallucinate", "Model B: Irrelevant", "Model B: Incorrect refusal (if applicable)"]
|
| 23 |
}
|
| 24 |
|
| 25 |
+
def load_context(set_interrupt=False):
|
| 26 |
+
"""
|
| 27 |
+
Load a new question and context
|
| 28 |
+
|
| 29 |
+
Parameters:
|
| 30 |
+
- set_interrupt: If True, will interrupt any ongoing inference before loading
|
| 31 |
+
"""
|
| 32 |
+
if set_interrupt:
|
| 33 |
+
# Interrupt any ongoing inference
|
| 34 |
+
generation_interrupt.set()
|
| 35 |
+
time.sleep(0.2) # Short delay to allow threads to detect interrupt
|
| 36 |
+
|
| 37 |
+
# Always clear the flag before starting new work
|
| 38 |
generation_interrupt.clear()
|
| 39 |
example = get_random_example()
|
| 40 |
|
|
|
|
| 97 |
"", "", "", "", None, [], False, load_leaderboard_data(),
|
| 98 |
gr.update(value="Generation was interrupted or failed. Please try again."),
|
| 99 |
gr.update(value="Generation was interrupted or failed. Please try again."),
|
| 100 |
+
# Keep voting buttons disabled when generation fails or is interrupted
|
| 101 |
+
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 102 |
+
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 103 |
+
gr.update(interactive=False, elem_classes=["vote-button"]),
|
| 104 |
+
gr.update(interactive=False, elem_classes=["vote-button", "vote-button-neither"]),
|
| 105 |
gr.update(choices=[], value=[], interactive=False, visible=False),
|
| 106 |
gr.update(visible=False),
|
| 107 |
+
gr.update(interactive=True, visible=True),
|
| 108 |
gr.update(visible=False),
|
| 109 |
gr.update(interactive=True),
|
| 110 |
gr.update(elem_classes=[])
|
|
|
|
| 118 |
None, [], False, agg_results,
|
| 119 |
gr.update(value=result["summary_a"]),
|
| 120 |
gr.update(value=result["summary_b"]),
|
| 121 |
+
# Enable voting buttons only when both summaries are ready
|
| 122 |
gr.update(interactive=True, elem_classes=["vote-button"]),
|
| 123 |
gr.update(interactive=True, elem_classes=["vote-button"]),
|
| 124 |
gr.update(interactive=True, elem_classes=["vote-button"]),
|
|
|
|
| 181 |
return [
|
| 182 |
gr.update(value="Loading new question and summaries...", interactive=False),
|
| 183 |
gr.update(value="Loading new question and summaries...", interactive=False),
|
| 184 |
+
# Disable voting buttons during loading
|
| 185 |
gr.update(interactive=False),
|
| 186 |
gr.update(interactive=False),
|
| 187 |
gr.update(interactive=False),
|
|
|
|
| 190 |
|
| 191 |
def handle_new_example_click():
|
| 192 |
"""Handle clicking 'Get new example' button"""
|
| 193 |
+
# Use the centralized approach - set_interrupt=True tells load_context to handle interruption
|
| 194 |
+
return load_context(set_interrupt=True)[0]
|
|
|
|
| 195 |
|
| 196 |
def update_ui_for_new_context(example):
|
| 197 |
"""Update UI with new context information"""
|
|
|
|
| 210 |
)) as demo:
|
| 211 |
# Load CSS
|
| 212 |
css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
with open(css_path, 'r') as f:
|
| 214 |
css_content = f.read()
|
| 215 |
|
|
|
|
| 216 |
gr.HTML(f"<style>{css_content}</style>")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# State Variables
|
| 219 |
current_example = gr.State({})
|
|
|
|
| 292 |
# Voting section
|
| 293 |
gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
|
| 294 |
with gr.Row():
|
| 295 |
+
# Start with voting buttons disabled
|
| 296 |
+
vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"], interactive=False)
|
| 297 |
+
vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"], interactive=False)
|
| 298 |
+
vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"], interactive=False)
|
| 299 |
+
vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"], interactive=False)
|
| 300 |
|
| 301 |
# Feedback and Submit sections
|
| 302 |
with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
|
|
|
|
| 340 |
|
| 341 |
results_table_display = gr.HTML(label="Model Performance")
|
| 342 |
|
|
|
|
| 343 |
# Toggle context display
|
| 344 |
context_toggle_btn.click(
|
| 345 |
fn=toggle_context_display,
|
|
|
|
| 348 |
)
|
| 349 |
|
| 350 |
# Initial loading - context first, then summaries
|
| 351 |
+
# Uses load_context without interruption since it's the first load
|
| 352 |
demo.load(
|
| 353 |
+
fn=load_context, # Default is set_interrupt=False
|
| 354 |
inputs=[],
|
| 355 |
outputs=[current_example, query_display, context_description, context_display,
|
| 356 |
context_toggle_btn, show_full_context]
|
|
|
|
| 379 |
outputs=[summary_a_display, summary_b_display, vote_button_a,
|
| 380 |
vote_button_b, vote_button_tie, vote_button_neither]
|
| 381 |
).then(
|
| 382 |
+
fn=handle_new_example_click, # Now uses the centralized approach
|
| 383 |
inputs=[],
|
| 384 |
outputs=[current_example]
|
| 385 |
).then(
|