Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -91,12 +91,9 @@ def _format_value_minimal(v) -> str:
|
|
91 |
if isinstance(v, (float, np.floating)):
|
92 |
if abs(v - round(v)) < 1e-9:
|
93 |
return str(int(round(v)))
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
return str(v)
|
98 |
-
except Exception:
|
99 |
-
return ""
|
100 |
|
101 |
|
102 |
def _prepare_dataframe(json_path: str) -> pd.DataFrame:
|
@@ -231,6 +228,89 @@ def build_view(json_path: str, name_filter: str = "", param_bins: list[str] | No
|
|
231 |
return table_value
|
232 |
|
233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
def ui() -> gr.Blocks:
|
235 |
with gr.Blocks(title="Model Leaderboard") as demo:
|
236 |
gr.Markdown("""
|
@@ -250,26 +330,38 @@ def ui() -> gr.Blocks:
|
|
250 |
value=[],
|
251 |
info="Select one or more bins"
|
252 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
|
254 |
# Non-interactive so Pandas Styler is respected; header sorting remains available
|
255 |
leaderboard_out = gr.Dataframe(label="Leaderboard", interactive=False)
|
256 |
|
257 |
demo.load(
|
258 |
-
fn=
|
259 |
-
inputs=[json_path_state, name_filter_in, param_bins_in],
|
260 |
-
outputs=[leaderboard_out],
|
261 |
)
|
262 |
|
263 |
# Recompute table on filter changes
|
264 |
name_filter_in.change(
|
265 |
-
fn=
|
266 |
-
inputs=[json_path_state, name_filter_in, param_bins_in],
|
267 |
-
outputs=[leaderboard_out],
|
268 |
)
|
269 |
param_bins_in.change(
|
270 |
-
fn=
|
271 |
-
inputs=[json_path_state, name_filter_in, param_bins_in],
|
272 |
-
outputs=[leaderboard_out],
|
|
|
|
|
|
|
|
|
|
|
273 |
)
|
274 |
|
275 |
gr.Markdown("""
|
|
|
91 |
if isinstance(v, (float, np.floating)):
|
92 |
if abs(v - round(v)) < 1e-9:
|
93 |
return str(int(round(v)))
|
94 |
+
s = f"{float(v):.6f}".rstrip("0").rstrip(".")
|
95 |
+
return s
|
96 |
+
|
|
|
|
|
|
|
97 |
|
98 |
|
99 |
def _prepare_dataframe(json_path: str) -> pd.DataFrame:
|
|
|
228 |
return table_value
|
229 |
|
230 |
|
231 |
+
def build_view_and_tasks(
|
232 |
+
json_path: str,
|
233 |
+
name_filter: str = "",
|
234 |
+
param_bins: list[str] | None = None,
|
235 |
+
excluded_tasks: list[str] | None = None,
|
236 |
+
):
|
237 |
+
"""Return the table and an update object for the exclude-tasks control.
|
238 |
+
|
239 |
+
- The available task choices are derived from the columns of the prepared dataframe
|
240 |
+
by excluding meta columns and helper columns.
|
241 |
+
- The table excludes the selected tasks and recomputes avg_score from only the included tasks.
|
242 |
+
"""
|
243 |
+
df = _prepare_dataframe(json_path)
|
244 |
+
|
245 |
+
# Determine all task-like columns (before exclusion)
|
246 |
+
meta_cols_base = [c for c in ["Model", "Provider", "Parameters", "β", "avg_score"] if c in df.columns]
|
247 |
+
tasks_all = [c for c in df.columns if c not in meta_cols_base]
|
248 |
+
|
249 |
+
excluded_set = set(excluded_tasks or [])
|
250 |
+
# Keep only tasks that actually exist
|
251 |
+
excluded_valid = [t for t in excluded_set if t in tasks_all]
|
252 |
+
included_tasks = [c for c in tasks_all if c not in excluded_set]
|
253 |
+
|
254 |
+
# Drop rows that are missing values for required tasks (only those that are included)
|
255 |
+
required_cols = [c for c in ["src_clf", "sum_rag", "sum_rag_v2"] if c in included_tasks]
|
256 |
+
if required_cols:
|
257 |
+
df = df.dropna(subset=required_cols, axis=0)
|
258 |
+
|
259 |
+
# Apply filters
|
260 |
+
df = _apply_filters(df, name_filter=name_filter, param_bins=param_bins)
|
261 |
+
|
262 |
+
# Remove excluded task columns from view
|
263 |
+
if excluded_valid:
|
264 |
+
df = df.drop(columns=[c for c in excluded_valid if c in df.columns], errors="ignore")
|
265 |
+
|
266 |
+
# Recompute avg_score from only included tasks
|
267 |
+
# Determine tasks present in df after exclusion
|
268 |
+
meta_cols_after = [c for c in ["Model", "Provider", "Parameters", "β", "avg_score"] if c in df.columns]
|
269 |
+
current_metric_cols = [c for c in df.columns if c not in meta_cols_after]
|
270 |
+
|
271 |
+
# Drop existing avg_score before recomputation
|
272 |
+
if "avg_score" in df.columns:
|
273 |
+
df = df.drop(columns=["avg_score"]) # will be re-added below
|
274 |
+
|
275 |
+
if current_metric_cols:
|
276 |
+
numeric_df = pd.DataFrame({c: pd.to_numeric(df[c], errors="coerce") for c in current_metric_cols})
|
277 |
+
df["avg_score"] = numeric_df.mean(axis=1, skipna=True).round(2)
|
278 |
+
else:
|
279 |
+
# No metrics left; fill avg_score with NaN to keep schema consistent
|
280 |
+
df["avg_score"] = np.nan
|
281 |
+
|
282 |
+
# Sort and reorder columns similar to _prepare_dataframe
|
283 |
+
if "avg_score" in df.columns:
|
284 |
+
df = df.sort_values(by="avg_score", ascending=False, na_position="last")
|
285 |
+
|
286 |
+
preferred_order = [c for c in ["Model", "Provider", "Parameters"] if c in df.columns]
|
287 |
+
remaining_cols = [c for c in df.columns if c not in preferred_order]
|
288 |
+
if "avg_score" in remaining_cols:
|
289 |
+
remaining_cols = ["avg_score"] + [c for c in remaining_cols if c != "avg_score"]
|
290 |
+
if preferred_order:
|
291 |
+
df = df[preferred_order + remaining_cols]
|
292 |
+
|
293 |
+
# Ensure separator column exists right after Parameters
|
294 |
+
if "Parameters" in df.columns and "β" not in df.columns:
|
295 |
+
insert_at = df.columns.get_loc("Parameters") + 1
|
296 |
+
df.insert(insert_at, "β", "")
|
297 |
+
|
298 |
+
# Style for display
|
299 |
+
if isinstance(df, pd.DataFrame) and not df.empty:
|
300 |
+
styler = df.style
|
301 |
+
if "Parameters" in df.columns:
|
302 |
+
styler = styler.apply(_style_parameters, subset=["Parameters"]) # type: ignore
|
303 |
+
styler = styler.format(_format_value_minimal)
|
304 |
+
table_value: object = styler
|
305 |
+
else:
|
306 |
+
table_value = pd.DataFrame()
|
307 |
+
|
308 |
+
# Update object for the exclude tasks checkbox
|
309 |
+
tasks_update = gr.update(choices=tasks_all, value=excluded_valid)
|
310 |
+
|
311 |
+
return table_value, tasks_update
|
312 |
+
|
313 |
+
|
314 |
def ui() -> gr.Blocks:
|
315 |
with gr.Blocks(title="Model Leaderboard") as demo:
|
316 |
gr.Markdown("""
|
|
|
330 |
value=[],
|
331 |
info="Select one or more bins"
|
332 |
)
|
333 |
+
# Task exclusion control (choices are populated dynamically)
|
334 |
+
excluded_tasks_in = gr.CheckboxGroup(
|
335 |
+
label="Exclude tasks",
|
336 |
+
choices=[],
|
337 |
+
value=[],
|
338 |
+
info="Select tasks to hide; all are shown by default",
|
339 |
+
)
|
340 |
|
341 |
# Non-interactive so Pandas Styler is respected; header sorting remains available
|
342 |
leaderboard_out = gr.Dataframe(label="Leaderboard", interactive=False)
|
343 |
|
344 |
demo.load(
|
345 |
+
fn=build_view_and_tasks,
|
346 |
+
inputs=[json_path_state, name_filter_in, param_bins_in, excluded_tasks_in],
|
347 |
+
outputs=[leaderboard_out, excluded_tasks_in],
|
348 |
)
|
349 |
|
350 |
# Recompute table on filter changes
|
351 |
name_filter_in.change(
|
352 |
+
fn=build_view_and_tasks,
|
353 |
+
inputs=[json_path_state, name_filter_in, param_bins_in, excluded_tasks_in],
|
354 |
+
outputs=[leaderboard_out, excluded_tasks_in],
|
355 |
)
|
356 |
param_bins_in.change(
|
357 |
+
fn=build_view_and_tasks,
|
358 |
+
inputs=[json_path_state, name_filter_in, param_bins_in, excluded_tasks_in],
|
359 |
+
outputs=[leaderboard_out, excluded_tasks_in],
|
360 |
+
)
|
361 |
+
excluded_tasks_in.change(
|
362 |
+
fn=build_view_and_tasks,
|
363 |
+
inputs=[json_path_state, name_filter_in, param_bins_in, excluded_tasks_in],
|
364 |
+
outputs=[leaderboard_out, excluded_tasks_in],
|
365 |
)
|
366 |
|
367 |
gr.Markdown("""
|