import time import pandas as pd import gradio as gr from gradio.themes.utils import sizes from gradio_leaderboard import Leaderboard from dotenv import load_dotenv load_dotenv() # Load environment variables from .env file (before imports) from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INSTRUCTIONS, WEBSITE_HEADER from constants import ( ASSAY_RENAME, # noqa: F401 SEQUENCES_FILE_DICT, LEADERBOARD_DISPLAY_COLUMNS, ABOUT_TAB_NAME, FAQ_TAB_NAME, TERMS_URL, LEADERBOARD_COLUMNS_RENAME, LEADERBOARD_COLUMNS_RENAME_LIST, SUBMIT_TAB_NAME, SLACK_URL, ) from submit import make_submission from utils import fetch_hf_results, show_output_box, periodic_data_fetch def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None): """ Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results(). """ df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy() if assay is not None: df = df[df["assay"] == assay] df = df[LEADERBOARD_DISPLAY_COLUMNS] df = df.sort_values(by="spearman", ascending=False) # After sorting, just add the reason for excluding heldout test set # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close" # Convert spearman column to string to avoid dtype incompatibility when assigning text df["spearman"] = df["spearman"].astype(str) df.loc[ (df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman" ] = "N/A, evaluated at competition close" # Finally, rename columns for readability df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME) return df def get_leaderboard_object(assay: str | None = None): filter_columns = ["dataset"] if assay is None: filter_columns.append("property") # Bug: Can't leave search_columns empty because then it says "Column None not found in headers" # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name current_dataframe = pd.read_csv("debug-current-results.csv") lb = Leaderboard( value=format_leaderboard_table(df_results=current_dataframe, assay=assay), datatype=["str", "str", "str", "number", "str"], select_columns=LEADERBOARD_COLUMNS_RENAME_LIST( ["model", "property", "spearman", "dataset", "user"] ), search_columns=["Model Name"], filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns), every=15, render=True, ) return lb def refresh_overall_leaderboard(): # debug-current-results.csv is updated by the outer thread current_dataframe = pd.read_csv("debug-current-results.csv") return format_leaderboard_table(df_results=current_dataframe) # Initialize global dataframe fetch_hf_results() time.sleep(2) # Give the outer thread time to create the file at the start current_dataframe = pd.read_csv("debug-current-results.csv") # Make font size bigger using gradio theme with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo: timer = gr.Timer(3) # Run every 3 seconds when page is focused ## Header with gr.Row(): with gr.Column(scale=6): # bigger text area gr.Markdown( WEBSITE_HEADER ) with gr.Column(scale=2): # smaller side column for logo gr.Image( value="./assets/competition_logo.jpg", show_label=False, show_download_button=False, show_share_button=False, show_fullscreen_button=False, width="25vw", # Take up the width of the column (2/8 = 1/4) ) with gr.Tabs(elem_classes="tab-buttons"): with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"): gr.Markdown(ABOUT_INTRO) gr.Image( value="./assets/prediction_explainer_cv.png", show_label=False, show_download_button=False, show_share_button=False, show_fullscreen_button=False, width="30vw", ) gr.Markdown(ABOUT_TEXT) # Sequence download buttons gr.Markdown( """### 📥 Download Sequences The GDPa1 dataset (with assay data and sequences) is available on Hugging Face [here](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1), but we provide this and the private test set for convenience.""" ) with gr.Row(): with gr.Column(): download_button_cv_about = gr.DownloadButton( label="📥 Download GDPa1 sequences", value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"], variant="secondary", ) with gr.Column(): download_button_test_about = gr.DownloadButton( label="📥 Download Private Test Set sequences", value=SEQUENCES_FILE_DICT["Heldout Test Set"], variant="secondary", ) with gr.TabItem( "🏆 Leaderboard", elem_id="abdev-benchmark-tab-table" ) as leaderboard_tab: gr.Markdown( """ # Overall Leaderboard (filter below by property) Each property has its own prize, and participants can submit models for any combination of properties. **Note**: It is *easy to overfit* the public GDPa1 dataset, which results in artificially high Spearman correlations. We would suggest training using cross-validation to give a better indication of the model's performance on the eventual private test set. """ ) lb = get_leaderboard_object() timer.tick(fn=refresh_overall_leaderboard, outputs=lb) demo.load(fn=refresh_overall_leaderboard, outputs=lb) with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"): gr.Markdown(SUBMIT_INSTRUCTIONS) with gr.Row(): with gr.Column(): username_input = gr.Textbox( label="Username", placeholder="Enter your Hugging Face username", info="This will be used to identify valid submissions, and to update your results if you submit again.", ) anonymous_checkbox = gr.Checkbox( label="Anonymous", value=False, info="If checked, your username will be replaced with an anonymous hash on the leaderboard.", ) model_name_input = gr.Textbox( label="Model Name", placeholder="Enter your model name (e.g., 'MyProteinLM-v1')", info="This will be displayed on the leaderboard.", ) model_description_input = gr.Textbox( label="Model Description (optional)", placeholder="Brief description of your model and approach", info="Describe your model, training data, or methodology.", lines=3, ) registration_code = gr.Textbox( label="Registration Code", placeholder="Enter your registration code", info="If you did not receive a registration code, please sign up on the Competition Registration page or email antibodycompetition@ginkgobioworks.com.", ) with gr.Column(): gr.Markdown("### Upload Both Submission Files") # GDPa1 Cross-validation file gr.Markdown("**GDPa1 Cross-Validation Predictions:**") download_button_cv = gr.DownloadButton( label="📥 Download GDPa1 sequences", value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"], variant="secondary", ) submission_file_cv = gr.File(label="GDPa1 Cross-Validation CSV") # Test set file gr.Markdown("**Private Test Set Predictions:**") download_button_test = gr.DownloadButton( label="📥 Download Private Test Set sequences", value=SEQUENCES_FILE_DICT["Heldout Test Set"], variant="secondary", ) submission_file_test = gr.File(label="Private Test Set CSV") submit_btn = gr.Button("Evaluate") message = gr.Textbox(label="Status", lines=3, visible=False) submit_btn.click( make_submission, inputs=[ submission_file_cv, submission_file_test, username_input, model_name_input, model_description_input, anonymous_checkbox, registration_code, ], outputs=[message], ).then( fn=show_output_box, inputs=[message], outputs=[message], ) with gr.Tab(FAQ_TAB_NAME): gr.Markdown("# Frequently Asked Questions") for i, (question, answer) in enumerate(FAQS.items()): # Would love to make questions bold but accordion doesn't support it question = f"{i+1}. {question}" with gr.Accordion(question, open=False): gr.Markdown(f"*{answer}*") # Italics for answers # Footnote gr.Markdown( f"""
📬 For questions or feedback, contact antibodycompetition@ginkgobioworks.com or discuss on the Slack community co-hosted by Bits in Bio.
Visit the Competition Registration page to sign up for updates and to register, and see Terms here.
""", elem_id="contact-footer", ) if __name__ == "__main__": demo.launch(ssr_mode=False, app_kwargs={"lifespan": periodic_data_fetch})