|
|
import time |
|
|
|
|
|
import pandas as pd |
|
|
import gradio as gr |
|
|
from gradio.themes.utils import sizes |
|
|
from gradio_leaderboard import Leaderboard |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INSTRUCTIONS, WEBSITE_HEADER |
|
|
from constants import ( |
|
|
ASSAY_RENAME, |
|
|
SEQUENCES_FILE_DICT, |
|
|
LEADERBOARD_DISPLAY_COLUMNS, |
|
|
ABOUT_TAB_NAME, |
|
|
FAQ_TAB_NAME, |
|
|
TERMS_URL, |
|
|
LEADERBOARD_COLUMNS_RENAME, |
|
|
LEADERBOARD_COLUMNS_RENAME_LIST, |
|
|
SUBMIT_TAB_NAME, |
|
|
SLACK_URL, |
|
|
) |
|
|
from submit import make_submission |
|
|
from utils import fetch_hf_results, show_output_box, periodic_data_fetch |
|
|
|
|
|
|
|
|
def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None): |
|
|
""" |
|
|
Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results(). |
|
|
""" |
|
|
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy() |
|
|
if assay is not None: |
|
|
df = df[df["assay"] == assay] |
|
|
df = df[LEADERBOARD_DISPLAY_COLUMNS] |
|
|
df = df.sort_values(by="spearman", ascending=False) |
|
|
|
|
|
|
|
|
|
|
|
df["spearman"] = df["spearman"].astype(str) |
|
|
df.loc[ |
|
|
(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman" |
|
|
] = "N/A, evaluated at competition close" |
|
|
|
|
|
|
|
|
df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME) |
|
|
return df |
|
|
|
|
|
|
|
|
def get_leaderboard_object(assay: str | None = None): |
|
|
filter_columns = ["dataset"] |
|
|
if assay is None: |
|
|
filter_columns.append("property") |
|
|
|
|
|
|
|
|
current_dataframe = pd.read_csv("debug-current-results.csv") |
|
|
lb = Leaderboard( |
|
|
value=format_leaderboard_table(df_results=current_dataframe, assay=assay), |
|
|
datatype=["str", "str", "str", "number", "str"], |
|
|
select_columns=LEADERBOARD_COLUMNS_RENAME_LIST( |
|
|
["model", "property", "spearman", "dataset", "user"] |
|
|
), |
|
|
search_columns=["Model Name"], |
|
|
filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns), |
|
|
every=15, |
|
|
render=True, |
|
|
) |
|
|
return lb |
|
|
|
|
|
|
|
|
def refresh_overall_leaderboard(): |
|
|
|
|
|
current_dataframe = pd.read_csv("debug-current-results.csv") |
|
|
return format_leaderboard_table(df_results=current_dataframe) |
|
|
|
|
|
|
|
|
|
|
|
fetch_hf_results() |
|
|
time.sleep(2) |
|
|
current_dataframe = pd.read_csv("debug-current-results.csv") |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo: |
|
|
timer = gr.Timer(3) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=6): |
|
|
gr.Markdown( |
|
|
WEBSITE_HEADER |
|
|
) |
|
|
with gr.Column(scale=2): |
|
|
gr.Image( |
|
|
value="./assets/competition_logo.jpg", |
|
|
show_label=False, |
|
|
show_download_button=False, |
|
|
show_share_button=False, |
|
|
show_fullscreen_button=False, |
|
|
width="25vw", |
|
|
) |
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
|
with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"): |
|
|
gr.Markdown(ABOUT_INTRO) |
|
|
gr.Image( |
|
|
value="./assets/prediction_explainer_cv.png", |
|
|
show_label=False, |
|
|
show_download_button=False, |
|
|
show_share_button=False, |
|
|
show_fullscreen_button=False, |
|
|
width="30vw", |
|
|
) |
|
|
gr.Markdown(ABOUT_TEXT) |
|
|
|
|
|
|
|
|
gr.Markdown( |
|
|
"""### π₯ Download Sequences |
|
|
The GDPa1 dataset (with assay data and sequences) is available on Hugging Face [here](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1), |
|
|
but we provide this and the private test set for convenience.""" |
|
|
) |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
download_button_cv_about = gr.DownloadButton( |
|
|
label="π₯ Download GDPa1 sequences", |
|
|
value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"], |
|
|
variant="secondary", |
|
|
) |
|
|
with gr.Column(): |
|
|
download_button_test_about = gr.DownloadButton( |
|
|
label="π₯ Download Private Test Set sequences", |
|
|
value=SEQUENCES_FILE_DICT["Heldout Test Set"], |
|
|
variant="secondary", |
|
|
) |
|
|
|
|
|
with gr.TabItem( |
|
|
"π Leaderboard", elem_id="abdev-benchmark-tab-table" |
|
|
) as leaderboard_tab: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# Overall Leaderboard (filter below by property) |
|
|
Each property has its own prize, and participants can submit models for any combination of properties. |
|
|
|
|
|
**Note**: It is *easy to overfit* the public GDPa1 dataset, which results in artificially high Spearman correlations. |
|
|
We would suggest training using cross-validation to give a better indication of the model's performance on the eventual private test set. |
|
|
""" |
|
|
) |
|
|
lb = get_leaderboard_object() |
|
|
timer.tick(fn=refresh_overall_leaderboard, outputs=lb) |
|
|
demo.load(fn=refresh_overall_leaderboard, outputs=lb) |
|
|
|
|
|
with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"): |
|
|
gr.Markdown(SUBMIT_INSTRUCTIONS) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
username_input = gr.Textbox( |
|
|
label="Username", |
|
|
placeholder="Enter your Hugging Face username", |
|
|
info="This will be used to identify valid submissions, and to update your results if you submit again.", |
|
|
) |
|
|
|
|
|
anonymous_checkbox = gr.Checkbox( |
|
|
label="Anonymous", |
|
|
value=False, |
|
|
info="If checked, your username will be replaced with an anonymous hash on the leaderboard.", |
|
|
) |
|
|
model_name_input = gr.Textbox( |
|
|
label="Model Name", |
|
|
placeholder="Enter your model name (e.g., 'MyProteinLM-v1')", |
|
|
info="This will be displayed on the leaderboard.", |
|
|
) |
|
|
model_description_input = gr.Textbox( |
|
|
label="Model Description (optional)", |
|
|
placeholder="Brief description of your model and approach", |
|
|
info="Describe your model, training data, or methodology.", |
|
|
lines=3, |
|
|
) |
|
|
registration_code = gr.Textbox( |
|
|
label="Registration Code", |
|
|
placeholder="Enter your registration code", |
|
|
info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.", |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("### Upload Both Submission Files") |
|
|
|
|
|
|
|
|
gr.Markdown("**GDPa1 Cross-Validation Predictions:**") |
|
|
download_button_cv = gr.DownloadButton( |
|
|
label="π₯ Download GDPa1 sequences", |
|
|
value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"], |
|
|
variant="secondary", |
|
|
) |
|
|
submission_file_cv = gr.File(label="GDPa1 Cross-Validation CSV") |
|
|
|
|
|
|
|
|
gr.Markdown("**Private Test Set Predictions:**") |
|
|
download_button_test = gr.DownloadButton( |
|
|
label="π₯ Download Private Test Set sequences", |
|
|
value=SEQUENCES_FILE_DICT["Heldout Test Set"], |
|
|
variant="secondary", |
|
|
) |
|
|
submission_file_test = gr.File(label="Private Test Set CSV") |
|
|
|
|
|
submit_btn = gr.Button("Evaluate") |
|
|
message = gr.Textbox(label="Status", lines=3, visible=False) |
|
|
|
|
|
submit_btn.click( |
|
|
make_submission, |
|
|
inputs=[ |
|
|
submission_file_cv, |
|
|
submission_file_test, |
|
|
username_input, |
|
|
model_name_input, |
|
|
model_description_input, |
|
|
anonymous_checkbox, |
|
|
registration_code, |
|
|
], |
|
|
outputs=[message], |
|
|
).then( |
|
|
fn=show_output_box, |
|
|
inputs=[message], |
|
|
outputs=[message], |
|
|
) |
|
|
with gr.Tab(FAQ_TAB_NAME): |
|
|
gr.Markdown("# Frequently Asked Questions") |
|
|
for i, (question, answer) in enumerate(FAQS.items()): |
|
|
|
|
|
question = f"{i+1}. {question}" |
|
|
with gr.Accordion(question, open=False): |
|
|
if isinstance(answer, list): |
|
|
|
|
|
italicized_answer = " \n".join(f"*{item}*" for item in answer) |
|
|
print(italicized_answer) |
|
|
gr.Markdown(italicized_answer) |
|
|
else: |
|
|
gr.Markdown(f"*{answer}*") |
|
|
|
|
|
|
|
|
gr.Markdown( |
|
|
f""" |
|
|
<div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;"> |
|
|
π¬ For questions or feedback, contact <a href="mailto:[email protected]">[email protected]</a> or discuss on the <a href="{SLACK_URL}">Slack community</a> co-hosted by Bits in Bio.<br> |
|
|
Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register, and see Terms <a href="{TERMS_URL}">here</a>. |
|
|
</div> |
|
|
""", |
|
|
elem_id="contact-footer", |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(ssr_mode=False, app_kwargs={"lifespan": periodic_data_fetch}) |
|
|
|