loodvanniekerkginkgo's picture
Added more text and italicized multiline FAQs
95a9631
raw
history blame
11.3 kB
import time
import pandas as pd
import gradio as gr
from gradio.themes.utils import sizes
from gradio_leaderboard import Leaderboard
from dotenv import load_dotenv
load_dotenv() # Load environment variables from .env file (before imports)
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INSTRUCTIONS, WEBSITE_HEADER
from constants import (
ASSAY_RENAME, # noqa: F401
SEQUENCES_FILE_DICT,
LEADERBOARD_DISPLAY_COLUMNS,
ABOUT_TAB_NAME,
FAQ_TAB_NAME,
TERMS_URL,
LEADERBOARD_COLUMNS_RENAME,
LEADERBOARD_COLUMNS_RENAME_LIST,
SUBMIT_TAB_NAME,
SLACK_URL,
)
from submit import make_submission
from utils import fetch_hf_results, show_output_box, periodic_data_fetch
def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
"""
Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results().
"""
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
if assay is not None:
df = df[df["assay"] == assay]
df = df[LEADERBOARD_DISPLAY_COLUMNS]
df = df.sort_values(by="spearman", ascending=False)
# After sorting, just add the reason for excluding heldout test set
# Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
# Convert spearman column to string to avoid dtype incompatibility when assigning text
df["spearman"] = df["spearman"].astype(str)
df.loc[
(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
] = "N/A, evaluated at competition close"
# Finally, rename columns for readability
df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
return df
def get_leaderboard_object(assay: str | None = None):
filter_columns = ["dataset"]
if assay is None:
filter_columns.append("property")
# Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
# Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
current_dataframe = pd.read_csv("debug-current-results.csv")
lb = Leaderboard(
value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
datatype=["str", "str", "str", "number", "str"],
select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
["model", "property", "spearman", "dataset", "user"]
),
search_columns=["Model Name"],
filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
every=15,
render=True,
)
return lb
def refresh_overall_leaderboard():
# debug-current-results.csv is updated by the outer thread
current_dataframe = pd.read_csv("debug-current-results.csv")
return format_leaderboard_table(df_results=current_dataframe)
# Initialize global dataframe
fetch_hf_results()
time.sleep(2) # Give the outer thread time to create the file at the start
current_dataframe = pd.read_csv("debug-current-results.csv")
# Make font size bigger using gradio theme
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
timer = gr.Timer(3) # Run every 3 seconds when page is focused
## Header
with gr.Row():
with gr.Column(scale=6): # bigger text area
gr.Markdown(
WEBSITE_HEADER
)
with gr.Column(scale=2): # smaller side column for logo
gr.Image(
value="./assets/competition_logo.jpg",
show_label=False,
show_download_button=False,
show_share_button=False,
show_fullscreen_button=False,
width="25vw", # Take up the width of the column (2/8 = 1/4)
)
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
gr.Markdown(ABOUT_INTRO)
gr.Image(
value="./assets/prediction_explainer_cv.png",
show_label=False,
show_download_button=False,
show_share_button=False,
show_fullscreen_button=False,
width="30vw",
)
gr.Markdown(ABOUT_TEXT)
# Sequence download buttons
gr.Markdown(
"""### πŸ“₯ Download Sequences
The GDPa1 dataset (with assay data and sequences) is available on Hugging Face [here](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1),
but we provide this and the private test set for convenience."""
)
with gr.Row():
with gr.Column():
download_button_cv_about = gr.DownloadButton(
label="πŸ“₯ Download GDPa1 sequences",
value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"],
variant="secondary",
)
with gr.Column():
download_button_test_about = gr.DownloadButton(
label="πŸ“₯ Download Private Test Set sequences",
value=SEQUENCES_FILE_DICT["Heldout Test Set"],
variant="secondary",
)
with gr.TabItem(
"πŸ† Leaderboard", elem_id="abdev-benchmark-tab-table"
) as leaderboard_tab:
gr.Markdown(
"""
# Overall Leaderboard (filter below by property)
Each property has its own prize, and participants can submit models for any combination of properties.
**Note**: It is *easy to overfit* the public GDPa1 dataset, which results in artificially high Spearman correlations.
We would suggest training using cross-validation to give a better indication of the model's performance on the eventual private test set.
"""
)
lb = get_leaderboard_object()
timer.tick(fn=refresh_overall_leaderboard, outputs=lb)
demo.load(fn=refresh_overall_leaderboard, outputs=lb)
with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
gr.Markdown(SUBMIT_INSTRUCTIONS)
with gr.Row():
with gr.Column():
username_input = gr.Textbox(
label="Username",
placeholder="Enter your Hugging Face username",
info="This will be used to identify valid submissions, and to update your results if you submit again.",
)
anonymous_checkbox = gr.Checkbox(
label="Anonymous",
value=False,
info="If checked, your username will be replaced with an anonymous hash on the leaderboard.",
)
model_name_input = gr.Textbox(
label="Model Name",
placeholder="Enter your model name (e.g., 'MyProteinLM-v1')",
info="This will be displayed on the leaderboard.",
)
model_description_input = gr.Textbox(
label="Model Description (optional)",
placeholder="Brief description of your model and approach",
info="Describe your model, training data, or methodology.",
lines=3,
)
registration_code = gr.Textbox(
label="Registration Code",
placeholder="Enter your registration code",
info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.",
)
with gr.Column():
gr.Markdown("### Upload Both Submission Files")
# GDPa1 Cross-validation file
gr.Markdown("**GDPa1 Cross-Validation Predictions:**")
download_button_cv = gr.DownloadButton(
label="πŸ“₯ Download GDPa1 sequences",
value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"],
variant="secondary",
)
submission_file_cv = gr.File(label="GDPa1 Cross-Validation CSV")
# Test set file
gr.Markdown("**Private Test Set Predictions:**")
download_button_test = gr.DownloadButton(
label="πŸ“₯ Download Private Test Set sequences",
value=SEQUENCES_FILE_DICT["Heldout Test Set"],
variant="secondary",
)
submission_file_test = gr.File(label="Private Test Set CSV")
submit_btn = gr.Button("Evaluate")
message = gr.Textbox(label="Status", lines=3, visible=False)
submit_btn.click(
make_submission,
inputs=[
submission_file_cv,
submission_file_test,
username_input,
model_name_input,
model_description_input,
anonymous_checkbox,
registration_code,
],
outputs=[message],
).then(
fn=show_output_box,
inputs=[message],
outputs=[message],
)
with gr.Tab(FAQ_TAB_NAME):
gr.Markdown("# Frequently Asked Questions")
for i, (question, answer) in enumerate(FAQS.items()):
# Would love to make questions bold but accordion doesn't support it
question = f"{i+1}. {question}"
with gr.Accordion(question, open=False):
if isinstance(answer, list):
# Italicize each line
italicized_answer = " \n".join(f"*{item}*" for item in answer)
print(italicized_answer)
gr.Markdown(italicized_answer)
else:
gr.Markdown(f"*{answer}*") # Italics for answers
# Footnote
gr.Markdown(
f"""
<div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
πŸ“¬ For questions or feedback, contact <a href="mailto:[email protected]">[email protected]</a> or discuss on the <a href="{SLACK_URL}">Slack community</a> co-hosted by Bits in Bio.<br>
Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register, and see Terms <a href="{TERMS_URL}">here</a>.
</div>
""",
elem_id="contact-footer",
)
if __name__ == "__main__":
demo.launch(ssr_mode=False, app_kwargs={"lifespan": periodic_data_fetch})