from pathlib import Path import json import pandas as pd import gradio as gr from gradio_leaderboard import Leaderboard from utils import read_submission_from_hub, write_results from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo from typing import BinaryIO, Literal from datetime import datetime import tempfile from datasets import load_dataset import io def make_submission( submitted_file: BinaryIO, user_state): if user_state is None: raise gr.Error("You must submit your username to submit a file.") file_path = submitted_file.name if not file_path: raise gr.Error("Uploaded file object does not have a valid file path.") path_obj = Path(file_path) timestamp = datetime.utcnow().isoformat() with (path_obj.open("rb") as f_in): file_content = f_in.read().decode("utf-8") # write to dataset filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json" record = { "submission_filename": filename, "submission_time": timestamp, "csv_content": file_content, "evaluated": False, "user": user_state, } with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp: json.dump(record, tmp, indent=2) tmp.flush() tmp_name = tmp.name API.upload_file( path_or_fileobj=tmp_name, path_in_repo=filename, repo_id=submissions_repo, repo_type="dataset", commit_message=f"Add submission for {user_state} at {timestamp}" ) Path(tmp_name).unlink() return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly." def get_leaderboard_table(assay: str | None = None): # ds = load_dataset(results_repo, split='train', download_mode="force_redownload") # full_df = pd.DataFrame(ds) # full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str) # full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True) # to_show = full_df.copy(deep=True) # to_show = to_show[to_show['user'] != 'test'] # to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']] # to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str) # Previously hosted on HF hub, local for now (Can also pull directly from github backend) column_order = ["model", "property", "spearman", "spearman_abs"] # "assay", ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload") df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"]) df["property"] = df["assay"].map(ASSAY_RENAME) df = df.query("assay.isin(@ASSAY_RENAME.keys())") if assay is not None: df = df[df['assay'] == assay] df = df[column_order] return df.sort_values(by="spearman_abs", ascending=False) def get_leaderboard_object(assay: str | None = None): df = get_leaderboard_table(assay=assay) filter_columns = ["model"] if assay is None: filter_columns.append("property") # TODO how to sort filter columns alphabetically? Leaderboard( value=df, datatype=["str", "str", "str", "number"], select_columns=["model", "property", "spearman"], search_columns=["model"], filter_columns=filter_columns, every=60, render=True ) def show_output_box(message): return gr.update(value=message, visible=True) # # def gradio_interface() -> gr.Blocks: with gr.Blocks() as demo: gr.Markdown(""" ## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard! Participants can submit their model to the leaderboard by """) with gr.Tabs(elem_classes="tab-buttons"): with gr.TabItem("🚀 Leaderboard", elem_id="abdev-benchmark-tab-table"): gr.Markdown("# Antibody Developability Benchmark Leaderboard") get_leaderboard_object() # TODO: this is not going to update well, need to fix # gr.Markdown("Extra info here") # Procedurally make these 5 tabs for assay in ASSAY_LIST: with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"): gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})") get_leaderboard_object(assay=assay) with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"): gr.Markdown( """ ## About this challenge We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1). **What is antibody developability?** Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects. Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'. Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization. **How to submit?** TODO **How to evaluate?** TODO """ ) with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"): gr.Markdown( """ # Antibody Developability Submission Upload a CSV to get a score! """ ) filename = gr.State(value=None) eval_state = gr.State(value=None) user_state = gr.State(value=None) # gr.LoginButton() with gr.Row(): with gr.Column(): username_input = gr.Textbox( label="Username", placeholder="Enter your Hugging Face username", info="This will be displayed on the leaderboard." ) with gr.Column(): boundary_file = gr.File(label="Submission CSV") username_input.change( fn=lambda x: x if x.strip() else None, inputs=username_input, outputs=user_state ) submit_btn = gr.Button("Evaluate") message = gr.Textbox(label="Status", lines=1, visible=False) # help message gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.") submit_btn.click( make_submission, inputs=[boundary_file, user_state], outputs=[message], ).then( fn=show_output_box, inputs=[message], outputs=[message], ) if __name__ == "__main__": demo.launch()