Linker1907 commited on
Commit
2e5b82a
·
1 Parent(s): 8964485

add mascot and starred bench from lighteval metatdata

Browse files
Files changed (1) hide show
  1. app.py +13 -47
app.py CHANGED
@@ -17,23 +17,6 @@ registry = Registry(custom_tasks=None, load_multilingual=True)
17
  modules_data = registry.get_tasks_dump()
18
 
19
 
20
- star_benchmarks = [
21
- "aime",
22
- "mmlu_pro",
23
- "gpqa",
24
- "hle",
25
- "arc_agi_2",
26
- "ifbench",
27
- "ifeval",
28
- "live code bench",
29
- "math 500",
30
- "mix_eval",
31
- "musr",
32
- "simpleqa",
33
- "MMLU pro"
34
- ]
35
-
36
-
37
  @dataclass
38
  class TaskDoc:
39
  module: str
@@ -44,6 +27,7 @@ class TaskDoc:
44
  dataset: str | None
45
  name: str | None = None
46
  task_names: list[str] = field(default_factory=list)
 
47
 
48
 
49
  def _module_to_github_path(module: str) -> str:
@@ -70,6 +54,7 @@ def index_tasks() -> tuple[list[TaskDoc], list[str], list[str]]:
70
  tgs = [t.lower() for t in docstring.get("tags", [])]
71
  paper = docstring.get("paper", "").strip() or None
72
  name = docstring.get("name", "").strip() or None
 
73
 
74
  # Convert dataset array to comma-separated string
75
  dataset_list = docstring.get("dataset", [])
@@ -93,7 +78,8 @@ def index_tasks() -> tuple[list[TaskDoc], list[str], list[str]]:
93
  paper=paper,
94
  dataset=dataset,
95
  name=name,
96
- task_names=task_names
 
97
  ))
98
 
99
  languages_sorted = [
@@ -112,33 +98,6 @@ def normalize_name_for_matching(name: str) -> str:
112
  return re.sub(r"[_\s:]+", "", name.lower())
113
 
114
 
115
- def is_starred_benchmark(td: TaskDoc) -> bool:
116
- """Check if task is a starred benchmark."""
117
- module_parts = td.module.split(".")
118
- base_no_ext = module_parts[-1] if module_parts else ""
119
- fallback_name = module_parts[-2] if base_no_ext == "main" and len(module_parts) >= 2 else base_no_ext
120
-
121
- task_name_raw = (td.name or "").lower().strip()
122
- task_name_display = (td.name or fallback_name).replace("_", " ").lower().strip()
123
- normalized_task_display = normalize_name_for_matching(task_name_display)
124
- normalized_module = normalize_name_for_matching(base_no_ext)
125
- normalized_name = normalize_name_for_matching(task_name_raw)
126
- normalized_dataset = normalize_name_for_matching(td.dataset or "")
127
-
128
- for star_name in star_benchmarks:
129
- normalized_star = normalize_name_for_matching(star_name)
130
- if (normalized_star == normalized_task_display or
131
- normalized_star == normalized_module or
132
- normalized_star == normalized_name or
133
- normalized_star in normalized_task_display or
134
- normalized_star in normalized_module or
135
- (normalized_dataset and normalized_star in normalized_dataset) or
136
- star_name.lower() in task_name_display or
137
- star_name.lower() in base_no_ext.lower()):
138
- return True
139
- return False
140
-
141
-
142
  def filter_tasks(languages: list[str], tags: list[str], search: str) -> list[TaskDoc]:
143
  """Filter tasks by languages, tags, and search query."""
144
  selected_langs = [lang.lower() for lang in (languages or [])]
@@ -157,7 +116,7 @@ def filter_tasks(languages: list[str], tags: list[str], search: str) -> list[Tas
157
  continue
158
  out.append(td)
159
 
160
- out.sort(key=lambda td: (not is_starred_benchmark(td), (td.name or td.module).lower()))
161
  return out
162
 
163
 
@@ -240,7 +199,7 @@ def render_cards(tasks: list[TaskDoc]) -> str:
240
  if len(datasets) > 6:
241
  dataset_links.append(f'<span class="dataset-more">+{len(datasets) - 6} more</span>')
242
  dataset_html = " ".join(dataset_links) if dataset_links else ""
243
- star_icon = "⭐ " if is_starred_benchmark(t) else ""
244
 
245
  # Display evaluation task names (max 3 visible, with dropdown for more)
246
  # Group task names by prefix to collapse shared prefixes
@@ -697,6 +656,13 @@ with gr.Blocks(title="Lighteval Tasks Explorer", css=custom_css) as demo:
697
  show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
698
  tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, value=[], visible=False)
699
  gr.Markdown("Tip: use the filters and search together. Results update live.")
 
 
 
 
 
 
 
700
 
701
  with gr.Column(scale=5):
702
  cards = gr.HTML()
 
17
  modules_data = registry.get_tasks_dump()
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @dataclass
21
  class TaskDoc:
22
  module: str
 
27
  dataset: str | None
28
  name: str | None = None
29
  task_names: list[str] = field(default_factory=list)
30
+ starred: bool = False
31
 
32
 
33
  def _module_to_github_path(module: str) -> str:
 
54
  tgs = [t.lower() for t in docstring.get("tags", [])]
55
  paper = docstring.get("paper", "").strip() or None
56
  name = docstring.get("name", "").strip() or None
57
+ starred = docstring.get("starred", False)
58
 
59
  # Convert dataset array to comma-separated string
60
  dataset_list = docstring.get("dataset", [])
 
78
  paper=paper,
79
  dataset=dataset,
80
  name=name,
81
+ task_names=task_names,
82
+ starred=starred
83
  ))
84
 
85
  languages_sorted = [
 
98
  return re.sub(r"[_\s:]+", "", name.lower())
99
 
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def filter_tasks(languages: list[str], tags: list[str], search: str) -> list[TaskDoc]:
102
  """Filter tasks by languages, tags, and search query."""
103
  selected_langs = [lang.lower() for lang in (languages or [])]
 
116
  continue
117
  out.append(td)
118
 
119
+ out.sort(key=lambda td: (not td.starred, (td.name or td.module).lower()))
120
  return out
121
 
122
 
 
199
  if len(datasets) > 6:
200
  dataset_links.append(f'<span class="dataset-more">+{len(datasets) - 6} more</span>')
201
  dataset_html = " ".join(dataset_links) if dataset_links else ""
202
+ star_icon = "⭐ " if t.starred else ""
203
 
204
  # Display evaluation task names (max 3 visible, with dropdown for more)
205
  # Group task names by prefix to collapse shared prefixes
 
656
  show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
657
  tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, value=[], visible=False)
658
  gr.Markdown("Tip: use the filters and search together. Results update live.")
659
+ gr.Image(
660
+ value="measuring_model_size.png",
661
+ label="",
662
+ show_label=False,
663
+ container=False,
664
+ show_download_button=False
665
+ )
666
 
667
  with gr.Column(scale=5):
668
  cards = gr.HTML()