Spaces:
Sleeping
Sleeping
File size: 24,972 Bytes
93126d2 07bb213 93126d2 57f6cce b92da5d 671df1d b92da5d 671df1d b92da5d 671df1d b92da5d 57f6cce ab5d575 07bb213 e64fe73 07bb213 671df1d 07bb213 671df1d 07bb213 be15be7 ab5d575 07bb213 be15be7 671df1d 07bb213 be15be7 4cde644 93126d2 f17ba29 93126d2 349b4c0 0ae90b3 f17ba29 349b4c0 0ae90b3 f17ba29 0ae90b3 f17ba29 0ae90b3 f17ba29 0ae90b3 349b4c0 f17ba29 349b4c0 f17ba29 0ae90b3 349b4c0 f17ba29 0ae90b3 349b4c0 0ae90b3 f17ba29 349b4c0 f17ba29 faf9069 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 0ab0b5b f17ba29 ab5d575 f17ba29 ab5d575 f17ba29 a732253 f17ba29 93126d2 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 93126d2 07bb213 671df1d 93126d2 349b4c0 f17ba29 b92da5d ab5d575 be15be7 ab5d575 07bb213 be15be7 fb877dd ab5d575 fb877dd ab5d575 fb877dd ab5d575 fb877dd ab5d575 fb877dd ab5d575 be15be7 b92da5d be15be7 ab5d575 07bb213 be15be7 ab5d575 94835af ab5d575 07bb213 ab5d575 b92da5d be15be7 b92da5d 1fd1810 f17ba29 1fd1810 349b4c0 f17ba29 b92da5d 07a73c7 07bb213 b92da5d f17ba29 ab5d575 f17ba29 349b4c0 93126d2 f17ba29 93126d2 f17ba29 349b4c0 f17ba29 349b4c0 f17ba29 93126d2 ab5d575 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 |
import gradio as gr
import pandas as pd
import json
import os
from gradio.themes.utils import colors
# Load external CSS from the file "styles.css"
try:
with open("styles.css", "r", encoding="utf-8") as f:
custom_css = f.read()
except UnicodeDecodeError:
# Try with a different encoding if utf-8 fails
with open("styles.css", "r", encoding="latin-1") as f:
custom_css = f.read()
# Add more specific selector for Gradio and add !important to improve the cascading
additional_css = """
.gradio-container .checkbox-panel,
div.gradio-container [class*="block"] .checkbox-panel {
background-color: #27272A !important;
}
.gradio-container .search-panel,
div.gradio-container [class*="block"] .search-panel {
background-color: #27272A !important;
}
"""
custom_css += additional_css
# Create a custom theme with light colors for our panels
class CustomTheme(gr.themes.Base):
def __init__(self):
super().__init__(
primary_hue=colors.red,
secondary_hue=colors.gray,
neutral_hue=colors.gray,
text_size=gr.themes.sizes.text_lg
)
# Don't set any global background colors
self.block_border_width = "0px"
self.block_shadow = "none"
# Add additional CSS for the new styles, being more specific
custom_css += """
/* Only override specific panels by ID */
#checkbox-panel,
#search-panel {
background-color: #F0F0F0 !important;
}
/* Only affect immediate children of these specific panels */
#checkbox-panel > *,
#search-panel > * {
background-color: transparent !important;
}
/* Target checkbox inputs specifically */
#checkbox-panel input[type="checkbox"],
#search-panel input[type="text"] {
background-color: transparent !important;
}
"""
def strip_timestamp(name):
"""Remove the timestamp portion from the model name."""
parts = name.split('-')
return '-'.join(parts[1:]) if len(parts) > 1 else name
# Static grouping mapping for the 10 general submissions.
GROUPS = [
{"mwoz": "20250214_193236-o1", "tau_airline": "20250215_115156-tau-o1-airline", "tau_retail": "20250215_121147-tau-o1-retail"},
{"mwoz": "20250131_012338-llama405", "tau_airline": "20250204_144222-tau-llama-405b-airline", "tau_retail": "20250205_033820-tau-llama405b-retail"},
{"mwoz": "20250130_140218-4o", "tau_airline": "20250131_152503-tau-4o-airline", "tau_retail": "20250131_152422-tau-4o-retail"},
{"mwoz": "20250130_183030-claude", "tau_airline": "20250205_030422-tau-sonnet-airline", "tau_retail": "20250131_152807-tau-sonnet-retail"},
{"mwoz": "20250131_012449-llama70", "tau_airline": "20250208_024344-tau-llama70b-airline", "tau_retail": "20250208_030407-tau-llama70b-retail"},
{"mwoz": "20250131_013711-qwen72b", "tau_airline": "20250202_112945-qwen72b-airline", "tau_retail": "20250202_140527-qwen72b-retail"},
{"mwoz": "20250130_184905-mistrallarge", "tau_airline": "20250205_024823-tau-mistrallarge-airline", "tau_retail": "20250205_044403-tau-mistrallarge-retail"},
{"mwoz": "20250131_010143-o1mini", "tau_airline": "20250214_180731-tau-o1-mini-airline", "tau_retail": "20250214_142736-tau-o1-mini-retail"},
{"mwoz": "20250130_140439-4omini", "tau_airline": "20250131_152226-tau-4o-mini-airline", "tau_retail": "20250131_152338-tau-4o-mini-retail"},
{"mwoz": "20250130_145202-gpt35", "tau_airline": "20250131_152708-tau-gpt35-airline", "tau_retail": "20250131_152610-tau-gpt35-retail"}
]
def load_mwoz_results():
path = os.path.join("data", "mwoz_leaderboard_results.json")
if not os.path.exists(path):
return []
with open(path, "r") as f:
return json.load(f)
def load_tau_results():
path = os.path.join("data", "tau_leaderboard_results.json")
if not os.path.exists(path):
return []
with open(path, "r") as f:
return json.load(f)
def create_grouped_leaderboard(selected_mwoz, selected_tau_airline, selected_tau_retail, sort_state, search_query=""):
if not (selected_mwoz or selected_tau_airline or selected_tau_retail):
selected_mwoz = True
mwoz_data = load_mwoz_results()
tau_data = load_tau_results()
mwoz_lookup = {entry["model_name"]: entry for entry in mwoz_data}
tau_lookup = {entry["model_name"]: entry for entry in tau_data}
aggregated = []
for group in GROUPS:
metrics = {"avg_conv_consistency": 0, "avg_backend_consistency": 0, "avg_policy_completeness": 0}
count = 0
title_parts = []
judge_model = ""
if selected_mwoz:
key = group["mwoz"]
if key in mwoz_lookup:
record = mwoz_lookup[key]
metrics["avg_conv_consistency"] += record.get("avg_conv_consistency", 0)
metrics["avg_backend_consistency"] += record.get("avg_backend_consistency", 0)
metrics["avg_policy_completeness"] += record.get("avg_policy_completeness", 0)
count += 1
title_parts.append(strip_timestamp(key))
judge_model = record.get("judge_model", "")
if selected_tau_airline:
key = group["tau_airline"]
if key in tau_lookup:
record = tau_lookup[key]
metrics["avg_conv_consistency"] += record.get("avg_conv_consistency", 0)
metrics["avg_backend_consistency"] += record.get("avg_backend_consistency", 0)
metrics["avg_policy_completeness"] += record.get("avg_policy_completeness", 0)
count += 1
title_parts.append(strip_timestamp(key))
judge_model = record.get("judge_model", "")
if selected_tau_retail:
key = group["tau_retail"]
if key in tau_lookup:
record = tau_lookup[key]
metrics["avg_conv_consistency"] += record.get("avg_conv_consistency", 0)
metrics["avg_backend_consistency"] += record.get("avg_backend_consistency", 0)
metrics["avg_policy_completeness"] += record.get("avg_policy_completeness", 0)
count += 1
title_parts.append(strip_timestamp(key))
judge_model = record.get("judge_model", "")
if count > 0:
avg_conv = metrics["avg_conv_consistency"] / count
avg_backend = metrics["avg_backend_consistency"] / count
avg_policy = metrics["avg_policy_completeness"] / count
overall_avg = (avg_conv + avg_backend + avg_policy) / 3
else:
avg_conv = avg_backend = avg_policy = overall_avg = 0
model_name = " / ".join(title_parts)
# Apply search filter
if search_query and search_query.lower() not in model_name.lower():
continue
aggregated.append({
"Model": model_name,
"Average Score": round(overall_avg, 4),
"Conversation Consistency": round(avg_conv, 4),
"Backend Consistency": round(avg_backend, 4),
"Policy Completeness": round(avg_policy, 4),
"Judge Model": judge_model
})
df = pd.DataFrame(aggregated)
# If no results found after filtering
if df.empty:
return df
df["Rank"] = df["Average Score"].rank(ascending=False, method="min").astype(int)
allowed_sort_cols = ["Average Score", "Conversation Consistency", "Backend Consistency", "Policy Completeness"]
# Handle sort_state safely
if isinstance(sort_state, str):
try:
sort_state = json.loads(sort_state)
except:
sort_state = {"sort_by": "Average Score", "ascending": False}
# Ensure sort_state is a dict
if not isinstance(sort_state, dict):
sort_state = {"sort_by": "Average Score", "ascending": False}
sort_by = sort_state.get("sort_by", "Average Score")
ascending = sort_state.get("ascending", False)
if sort_by in allowed_sort_cols:
df = df.sort_values(sort_by, ascending=ascending)
else:
# Default sort if column not found
df = df.sort_values("Average Score", ascending=False)
cols = df.columns.tolist()
if "Rank" in cols:
cols.insert(0, cols.pop(cols.index("Rank")))
df = df[cols]
return df
def update_sort_state(current_state, clicked_column):
"""
Update the sorting state based on the clicked column.
Handles various input formats for current_state.
"""
# Default state if nothing valid is provided
new_state = {"sort_by": clicked_column, "ascending": False}
# Handle the case when current_state is a string (JSON)
if isinstance(current_state, str):
try:
current_state = json.loads(current_state)
except (json.JSONDecodeError, TypeError):
# If we can't parse it, return the default state
return new_state
# If current_state is None or not a dict, return default
if not isinstance(current_state, dict):
return new_state
# Now we're sure current_state is a dict
# Check if it has the needed keys
if "sort_by" in current_state:
if current_state["sort_by"] == clicked_column:
# Toggle direction for the same column
return {
"sort_by": clicked_column,
"ascending": not current_state.get("ascending", False)
}
else:
# New column, default to descending (false)
return {
"sort_by": clicked_column,
"ascending": False
}
# If we got here, current_state doesn't have the right format
return new_state
def sort_by_avg(sort_state):
return update_sort_state(sort_state, "Average Score")
def sort_by_conv(sort_state):
return update_sort_state(sort_state, "Conversation Consistency")
def sort_by_backend(sort_state):
return update_sort_state(sort_state, "Backend Consistency")
def sort_by_policy(sort_state):
return update_sort_state(sort_state, "Policy Completeness")
def get_color_for_value(value, min_val, max_val):
if max_val == min_val:
norm = 0.5
else:
norm = (value - min_val) / (max_val - min_val)
if norm < 0.5:
ratio = norm / 0.5
# Darker red for lower values
r = 180
g = int(140 * ratio)
b = 0
else:
ratio = (norm - 0.5) / 0.5
# Darker green for higher values
r = int(140 * (1 - ratio))
g = 140
b = 0
return f"#{r:02X}{g:02X}{b:02X}"
def generate_html_table(df):
if df.empty:
return "<div class='no-results'>No matching results found.</div>"
numeric_cols = ["Average Score", "Conversation Consistency", "Backend Consistency", "Policy Completeness"]
col_min = {}
col_max = {}
for col in numeric_cols:
col_min[col] = df[col].min() if not df.empty else 0
col_max[col] = df[col].max() if not df.empty else 0
# Build a simple HTML table without borders or JavaScript sorting
html = "<table style='border: none; border-collapse: collapse;'>"
# Header row
html += "<tr>"
for col in df.columns:
html += f"<th style='padding:8px; border: none;'>{col}</th>"
html += "</tr>"
# Table rows
for _, row in df.iterrows():
html += "<tr style='border: none;'>"
for col in df.columns:
cell_value = row[col]
if col in numeric_cols:
color = get_color_for_value(cell_value, col_min[col], col_max[col])
html += f"<td style='padding: 8px; border: none; color: {color}; font-weight: bold;'>{cell_value}</td>"
else:
html += f"<td style='padding: 8px; border: none;'>{cell_value}</td>"
html += "</tr>"
html += "</table>"
return html
def update_leaderboard(selected_mwoz, selected_tau_airline, selected_tau_retail, sort_state, search_query=""):
"""
Update the leaderboard based on selection and sort state.
"""
try:
# Convert sort_state to dict if it's a string
if isinstance(sort_state, str):
try:
sort_state = json.loads(sort_state)
except:
# If JSON parsing fails, create a default state
sort_state = {"sort_by": "Average Score", "ascending": False}
# Ensure sort_state is a dict
if not isinstance(sort_state, dict):
sort_state = {"sort_by": "Average Score", "ascending": False}
# Generate the data and table
df = create_grouped_leaderboard(selected_mwoz, selected_tau_airline, selected_tau_retail, sort_state, search_query)
html_table = generate_html_table(df)
# Get sort info with fallbacks
sort_col = sort_state.get("sort_by", "Average Score")
sort_dir = "▼" if not sort_state.get("ascending", False) else "▲"
html_output = f"""
<div class="sort-info">
<p>Sorted by: {sort_col} {sort_dir}</p>
</div>
{html_table}
"""
return html_output
except Exception as e:
# If anything goes wrong, return a basic table with an error message
print(f"Error in update_leaderboard: {str(e)}")
df = create_grouped_leaderboard(selected_mwoz, selected_tau_airline, selected_tau_retail,
{"sort_by": "Average Score", "ascending": False})
html_table = generate_html_table(df)
return f"""
<div class="sort-info" style="color: #ff6b6b;">
<p>Error in sorting. Using default sort: Average Score (descending)</p>
</div>
{html_table}
"""
# Create our custom theme instance
custom_theme = CustomTheme()
with gr.Blocks(css=custom_css, title="TD-EVAL Leaderboard", theme=custom_theme) as demo:
gr.Markdown("# 🏆 TD-EVAL Model Evaluation Leaderboard")
gr.HTML('<div class="subtitle">This leaderboard displays aggregated model performance across multiple evaluation metrics.</div>')
# Add JavaScript to ensure backgrounds are properly set
gr.HTML("""
<script>
// Function to fix background colors
function fixBackgrounds() {
// Add a style tag to force all block-info spans to be black
var styleEl = document.createElement('style');
styleEl.textContent = `
span[data-testid="block-info"] { color: #000000 !important; }
.svelte-1gfkn6j { color: #000000 !important; }
.search-panel label,
.search-panel .label-wrap,
.search-panel span,
#search-panel span,
div[id="search-panel"] span { color: #000000 !important; }
`;
document.head.appendChild(styleEl);
// Only fix specific panels by ID
var checkboxPanel = document.getElementById('checkbox-panel');
if (checkboxPanel) {
checkboxPanel.style.backgroundColor = '#F0F0F0';
// Only make checkboxes and their direct containers transparent
var checkboxes = checkboxPanel.querySelectorAll('input[type="checkbox"]');
checkboxes.forEach(function(checkbox) {
var parent = checkbox.parentElement;
if (parent) parent.style.backgroundColor = 'transparent';
checkbox.style.backgroundColor = 'transparent';
// Find and style the associated label to be black
var label = checkbox.nextElementSibling;
if (label && label.tagName === 'LABEL') {
label.style.color = '#000000';
}
// Also find any span elements that might contain the label text
var spans = parent.querySelectorAll('span');
spans.forEach(function(span) {
span.style.color = '#000000';
});
// Find label elements in the parent container
var labels = parent.querySelectorAll('label');
labels.forEach(function(label) {
label.style.color = '#000000';
});
// Apply custom styling for the checkbox to show orange checkmark
if (checkbox.checked) {
checkbox.style.position = 'relative';
checkbox.style.appearance = 'none';
checkbox.style.backgroundColor = '#F0F0F0';
checkbox.style.border = '1px solid #CCCCCC';
checkbox.style.borderRadius = '3px';
// Create or update the checkmark element
var checkmark = checkbox.querySelector('.orange-checkmark');
if (!checkmark) {
checkmark = document.createElement('span');
checkmark.className = 'orange-checkmark';
checkmark.style.position = 'absolute';
checkmark.style.left = '50%';
checkmark.style.top = '50%';
checkmark.style.transform = 'translate(-50%, -50%)';
checkmark.style.color = '#c34700';
checkmark.style.fontSize = '14px';
checkmark.style.fontWeight = 'bold';
checkmark.innerText = '✓';
checkbox.appendChild(checkmark);
}
}
});
}
var searchPanel = document.getElementById('search-panel');
if (searchPanel) {
searchPanel.style.backgroundColor = '#F0F0F0';
// Only make search input and its direct container transparent
var searchInput = searchPanel.querySelector('input[type="text"]');
if (searchInput) {
var parent = searchInput.parentElement;
if (parent) parent.style.backgroundColor = 'transparent';
searchInput.style.backgroundColor = '#FFFFFF';
// Ensure the border is visible and matches text color
searchInput.style.border = '2px solid #000000';
searchInput.style.color = '#000000';
}
// Make sure the label is black
var searchLabels = searchPanel.querySelectorAll('label, .label-wrap, .label-wrap span');
searchLabels.forEach(function(label) {
label.style.color = '#000000';
});
// Target the specific span element that contains the label text
var blockInfoSpans = document.querySelectorAll('span[data-testid="block-info"]');
blockInfoSpans.forEach(function(span) {
span.style.color = '#000000';
});
// Also target elements with the svelte class
var svelteElements = document.querySelectorAll('.svelte-1gfkn6j');
svelteElements.forEach(function(element) {
if (element.textContent.includes('Search models')) {
element.style.color = '#000000';
}
});
}
}
// Run on page load and every second for 3 seconds to catch any delayed rendering
setTimeout(fixBackgrounds, 500);
setTimeout(fixBackgrounds, 1000);
setTimeout(fixBackgrounds, 2000);
</script>
""")
gr.HTML('''
<div class="variants_container">
<div class="variants_title">Variants:</div>
<ul style="list-style: none; padding: 0; margin: 8px 0;">
<li>mwoz: Baseline variant.</li>
<li>tau-airline: Airline specialty variant.</li>
<li>tau-retail: Retail specialty variant.</li>
</ul>
<p>Use the checkboxes below to select which variants to include. At least one variant must be active.</p>
</div>
''')
with gr.Row(elem_classes="checkbox-panel", elem_id="checkbox-panel"):
cb_mwoz = gr.Checkbox(label="mwoz", value=True)
cb_tau_airline = gr.Checkbox(label="tau-airline", value=True)
cb_tau_retail = gr.Checkbox(label="tau-retail", value=True)
with gr.Row(elem_classes="search-panel", elem_id="search-panel"):
search_input = gr.Textbox(
label="Search models",
placeholder="Type to filter…",
elem_classes="search-input",
elem_id="search-input"
)
hidden_sort_state = gr.State(value={"sort_by": "Average Score", "ascending": False})
# Add sorting buttons
gr.Markdown("### Sort by:")
with gr.Row():
btn_avg = gr.Button("Average Score ▼")
btn_conv = gr.Button("Conversation Consistency")
btn_backend = gr.Button("Backend Consistency")
btn_policy = gr.Button("Policy Completeness")
leaderboard_display = gr.HTML(label="Aggregated Model Rankings")
# Function to toggle sort state and update button labels
def toggle_sort(column, current_state, btn_avg, btn_conv, btn_backend, btn_policy):
# Default new state - flip direction if same column, otherwise default to descending
if isinstance(current_state, dict) and current_state.get("sort_by") == column:
new_ascending = not current_state.get("ascending", False)
else:
new_ascending = False
new_state = {"sort_by": column, "ascending": new_ascending}
# Update button labels
direction = "▲" if new_ascending else "▼"
avg_label = f"Average Score {direction}" if column == "Average Score" else "Average Score"
conv_label = f"Conversation Consistency {direction}" if column == "Conversation Consistency" else "Conversation Consistency"
backend_label = f"Backend Consistency {direction}" if column == "Backend Consistency" else "Backend Consistency"
policy_label = f"Policy Completeness {direction}" if column == "Policy Completeness" else "Policy Completeness"
return new_state, avg_label, conv_label, backend_label, policy_label
# Connect sort buttons with the toggle function
btn_avg.click(
fn=toggle_sort,
inputs=[gr.Textbox(value="Average Score", visible=False), hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy],
outputs=[hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy]
).then(
fn=update_leaderboard,
inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input],
outputs=leaderboard_display
)
btn_conv.click(
fn=toggle_sort,
inputs=[gr.Textbox(value="Conversation Consistency", visible=False), hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy],
outputs=[hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy]
).then(
fn=update_leaderboard,
inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input],
outputs=leaderboard_display
)
btn_backend.click(
fn=toggle_sort,
inputs=[gr.Textbox(value="Backend Consistency", visible=False), hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy],
outputs=[hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy]
).then(
fn=update_leaderboard,
inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input],
outputs=leaderboard_display
)
btn_policy.click(
fn=toggle_sort,
inputs=[gr.Textbox(value="Policy Completeness", visible=False), hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy],
outputs=[hidden_sort_state, btn_avg, btn_conv, btn_backend, btn_policy]
).then(
fn=update_leaderboard,
inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input],
outputs=leaderboard_display
)
# Connect dataflow for variant checkboxes and search
cb_mwoz.change(fn=update_leaderboard, inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input], outputs=leaderboard_display)
cb_tau_airline.change(fn=update_leaderboard, inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input], outputs=leaderboard_display)
cb_tau_retail.change(fn=update_leaderboard, inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input], outputs=leaderboard_display)
search_input.change(fn=update_leaderboard, inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input], outputs=leaderboard_display)
demo.load(fn=update_leaderboard, inputs=[cb_mwoz, cb_tau_airline, cb_tau_retail, hidden_sort_state, search_input], outputs=leaderboard_display)
if __name__ == "__main__":
demo.launch() |