Spaces:
Running
Running
Ludwig Stumpp
commited on
Commit
·
1c71762
1
Parent(s):
412a418
Fix app
Browse files- poetry.lock +0 -0
- pyproject.toml +14 -0
- requirements-dev.txt +0 -4
- requirements.txt +0 -2
- setup.cfg +2 -0
- streamlit_app.py +35 -13
poetry.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.poetry]
|
| 2 |
+
package-mode = false
|
| 3 |
+
description = ""
|
| 4 |
+
authors = ["Ludwig Stumpp <[email protected]>"]
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
|
| 7 |
+
[tool.poetry.dependencies]
|
| 8 |
+
python = "^3.10"
|
| 9 |
+
pandas = "^2.2.2"
|
| 10 |
+
streamlit = "^1.37.1"
|
| 11 |
+
|
| 12 |
+
[build-system]
|
| 13 |
+
requires = ["poetry-core"]
|
| 14 |
+
build-backend = "poetry.core.masonry.api"
|
requirements-dev.txt
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
black
|
| 2 |
-
flake
|
| 3 |
-
isort
|
| 4 |
-
mypy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
pandas~=2.0.1
|
| 2 |
-
streamlit~=1.22.0
|
|
|
|
|
|
|
|
|
setup.cfg
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
max-line-length = 88
|
streamlit_app.py
CHANGED
|
@@ -4,7 +4,8 @@ from collections.abc import Iterable
|
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
-
from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype,
|
|
|
|
| 8 |
|
| 9 |
GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
| 10 |
NON_BENCHMARK_COLS = ["Open?", "Publisher"]
|
|
@@ -22,11 +23,13 @@ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataF
|
|
| 22 |
df = (
|
| 23 |
pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
|
| 24 |
.dropna(axis=1, how="all") # drop empty columns
|
| 25 |
-
.iloc[
|
|
|
|
|
|
|
| 26 |
.sort_index(ascending=True)
|
| 27 |
.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
| 28 |
.replace("", float("NaN"))
|
| 29 |
-
.
|
| 30 |
)
|
| 31 |
|
| 32 |
# remove whitespace from column names and index
|
|
@@ -37,7 +40,9 @@ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataF
|
|
| 37 |
return df
|
| 38 |
|
| 39 |
|
| 40 |
-
def extract_markdown_table_from_multiline(
|
|
|
|
|
|
|
| 41 |
"""Extracts the markdown table from a multiline string.
|
| 42 |
|
| 43 |
Args:
|
|
@@ -89,7 +94,9 @@ def remove_markdown_links(text: str) -> str:
|
|
| 89 |
return text
|
| 90 |
|
| 91 |
|
| 92 |
-
def filter_dataframe_by_row_and_columns(
|
|
|
|
|
|
|
| 93 |
"""
|
| 94 |
Filter dataframe by the rows and columns to display.
|
| 95 |
|
|
@@ -116,7 +123,8 @@ def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[s
|
|
| 116 |
df = pd.DataFrame(df.loc[to_filter_index])
|
| 117 |
|
| 118 |
to_filter_columns = st.multiselect(
|
| 119 |
-
"Filter by benchmark:",
|
|
|
|
| 120 |
)
|
| 121 |
if to_filter_columns:
|
| 122 |
df = pd.DataFrame(df[ignore_columns + to_filter_columns])
|
|
@@ -173,7 +181,9 @@ def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 173 |
),
|
| 174 |
)
|
| 175 |
if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
|
| 176 |
-
user_date_input_datetime = tuple(
|
|
|
|
|
|
|
| 177 |
start_date, end_date = user_date_input_datetime
|
| 178 |
df = df.loc[df[column].between(start_date, end_date)]
|
| 179 |
|
|
@@ -207,22 +217,30 @@ def setup_basic():
|
|
| 207 |
|
| 208 |
|
| 209 |
def setup_leaderboard(readme: str):
|
| 210 |
-
leaderboard_table = extract_markdown_table_from_multiline(
|
|
|
|
|
|
|
| 211 |
leaderboard_table = remove_markdown_links(leaderboard_table)
|
| 212 |
df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
|
| 213 |
-
df_leaderboard["Open?"] =
|
|
|
|
|
|
|
| 214 |
|
| 215 |
st.markdown("## Leaderboard")
|
| 216 |
modify = st.checkbox("Add filters")
|
| 217 |
clear_empty_entries = st.checkbox("Clear empty entries", value=True)
|
| 218 |
|
| 219 |
if modify:
|
| 220 |
-
df_leaderboard = filter_dataframe_by_row_and_columns(
|
|
|
|
|
|
|
| 221 |
df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
|
| 222 |
|
| 223 |
if clear_empty_entries:
|
| 224 |
df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
|
| 225 |
-
benchmark_columns = [
|
|
|
|
|
|
|
| 226 |
rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
|
| 227 |
df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
|
| 228 |
|
|
@@ -246,12 +264,16 @@ def setup_leaderboard(readme: str):
|
|
| 246 |
|
| 247 |
|
| 248 |
def setup_benchmarks(readme: str):
|
| 249 |
-
benchmarks_table = extract_markdown_table_from_multiline(
|
|
|
|
|
|
|
| 250 |
df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
|
| 251 |
|
| 252 |
st.markdown("## Covered Benchmarks")
|
| 253 |
|
| 254 |
-
selected_benchmark = st.selectbox(
|
|
|
|
|
|
|
| 255 |
df_selected = df_benchmarks.loc[selected_benchmark]
|
| 256 |
text = [
|
| 257 |
f"Name: {selected_benchmark}",
|
|
|
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
+
from pandas.api.types import (is_bool_dtype, is_datetime64_any_dtype,
|
| 8 |
+
is_numeric_dtype)
|
| 9 |
|
| 10 |
GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
| 11 |
NON_BENCHMARK_COLS = ["Open?", "Publisher"]
|
|
|
|
| 23 |
df = (
|
| 24 |
pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
|
| 25 |
.dropna(axis=1, how="all") # drop empty columns
|
| 26 |
+
.iloc[
|
| 27 |
+
1:
|
| 28 |
+
] # drop first row which is the "----" separator of the original markdown table
|
| 29 |
.sort_index(ascending=True)
|
| 30 |
.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
| 31 |
.replace("", float("NaN"))
|
| 32 |
+
.apply(pd.to_numeric, errors="ignore")
|
| 33 |
)
|
| 34 |
|
| 35 |
# remove whitespace from column names and index
|
|
|
|
| 40 |
return df
|
| 41 |
|
| 42 |
|
| 43 |
+
def extract_markdown_table_from_multiline(
|
| 44 |
+
multiline: str, table_headline: str, next_headline_start: str = "#"
|
| 45 |
+
) -> str:
|
| 46 |
"""Extracts the markdown table from a multiline string.
|
| 47 |
|
| 48 |
Args:
|
|
|
|
| 94 |
return text
|
| 95 |
|
| 96 |
|
| 97 |
+
def filter_dataframe_by_row_and_columns(
|
| 98 |
+
df: pd.DataFrame, ignore_columns: list[str] | None = None
|
| 99 |
+
) -> pd.DataFrame:
|
| 100 |
"""
|
| 101 |
Filter dataframe by the rows and columns to display.
|
| 102 |
|
|
|
|
| 123 |
df = pd.DataFrame(df.loc[to_filter_index])
|
| 124 |
|
| 125 |
to_filter_columns = st.multiselect(
|
| 126 |
+
"Filter by benchmark:",
|
| 127 |
+
sorted([c for c in df.columns if c not in ignore_columns]),
|
| 128 |
)
|
| 129 |
if to_filter_columns:
|
| 130 |
df = pd.DataFrame(df[ignore_columns + to_filter_columns])
|
|
|
|
| 181 |
),
|
| 182 |
)
|
| 183 |
if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
|
| 184 |
+
user_date_input_datetime = tuple(
|
| 185 |
+
map(pd.to_datetime, user_date_input)
|
| 186 |
+
)
|
| 187 |
start_date, end_date = user_date_input_datetime
|
| 188 |
df = df.loc[df[column].between(start_date, end_date)]
|
| 189 |
|
|
|
|
| 217 |
|
| 218 |
|
| 219 |
def setup_leaderboard(readme: str):
|
| 220 |
+
leaderboard_table = extract_markdown_table_from_multiline(
|
| 221 |
+
readme, table_headline="## Leaderboard"
|
| 222 |
+
)
|
| 223 |
leaderboard_table = remove_markdown_links(leaderboard_table)
|
| 224 |
df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
|
| 225 |
+
df_leaderboard["Open?"] = (
|
| 226 |
+
df_leaderboard["Open?"].map({"yes": 1, "no": 0}).astype(bool)
|
| 227 |
+
)
|
| 228 |
|
| 229 |
st.markdown("## Leaderboard")
|
| 230 |
modify = st.checkbox("Add filters")
|
| 231 |
clear_empty_entries = st.checkbox("Clear empty entries", value=True)
|
| 232 |
|
| 233 |
if modify:
|
| 234 |
+
df_leaderboard = filter_dataframe_by_row_and_columns(
|
| 235 |
+
df_leaderboard, ignore_columns=NON_BENCHMARK_COLS
|
| 236 |
+
)
|
| 237 |
df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
|
| 238 |
|
| 239 |
if clear_empty_entries:
|
| 240 |
df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
|
| 241 |
+
benchmark_columns = [
|
| 242 |
+
c for c in df_leaderboard.columns if df_leaderboard[c].dtype == float
|
| 243 |
+
]
|
| 244 |
rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
|
| 245 |
df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
|
| 246 |
|
|
|
|
| 264 |
|
| 265 |
|
| 266 |
def setup_benchmarks(readme: str):
|
| 267 |
+
benchmarks_table = extract_markdown_table_from_multiline(
|
| 268 |
+
readme, table_headline="## Benchmarks"
|
| 269 |
+
)
|
| 270 |
df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
|
| 271 |
|
| 272 |
st.markdown("## Covered Benchmarks")
|
| 273 |
|
| 274 |
+
selected_benchmark = st.selectbox(
|
| 275 |
+
"Select a benchmark to learn more:", df_benchmarks.index.unique()
|
| 276 |
+
)
|
| 277 |
df_selected = df_benchmarks.loc[selected_benchmark]
|
| 278 |
text = [
|
| 279 |
f"Name: {selected_benchmark}",
|