abdev-leaderboard

Running

App Files Files Community

abdev-leaderboard / app.py

loodvanniekerkginkgo

Added new metrics (here and to dataset), and loaded results once

de75bee 2 months ago

raw

history blame

7.75 kB

	from pathlib import Path
	import json
	import pandas as pd

	import gradio as gr
	from gradio_leaderboard import Leaderboard

	from utils import read_submission_from_hub, write_results
	from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo
	from typing import BinaryIO, Literal
	from datetime import datetime
	import tempfile
	from datasets import load_dataset
	import io

	def make_submission(
	submitted_file: BinaryIO,
	user_state):

	if user_state is None:
	raise gr.Error("You must submit your username to submit a file.")

	file_path = submitted_file.name

	if not file_path:
	raise gr.Error("Uploaded file object does not have a valid file path.")

	path_obj = Path(file_path)
	timestamp = datetime.utcnow().isoformat()

	with (path_obj.open("rb") as f_in):
	file_content = f_in.read().decode("utf-8")

	# write to dataset
	filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json"
	record = {
	"submission_filename": filename,
	"submission_time": timestamp,
	"csv_content": file_content,
	"evaluated": False,
	"user": user_state,
	}
	with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
	json.dump(record, tmp, indent=2)
	tmp.flush()
	tmp_name = tmp.name

	API.upload_file(
	path_or_fileobj=tmp_name,
	path_in_repo=filename,
	repo_id=submissions_repo,
	repo_type="dataset",
	commit_message=f"Add submission for {user_state} at {timestamp}"
	)
	Path(tmp_name).unlink()

	return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."


	def get_leaderboard_table(df_results: pd.DataFrame, assay: str \| None = None):
	# ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
	# full_df = pd.DataFrame(ds)
	# full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)

	# full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
	# to_show = full_df.copy(deep=True)
	# to_show = to_show[to_show['user'] != 'test']
	# to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
	# to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)

	# Previously hosted on HF hub, local for now (Can also pull directly from github backend)
	column_order = ["model", "property", "spearman", "spearman_abs"]
	df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
	if assay is not None:
	df = df[df['assay'] == assay]
	df = df[column_order]
	return df.sort_values(by="spearman_abs", ascending=False)

	def get_leaderboard_object(df_results: pd.DataFrame, assay: str \| None = None):
	df = get_leaderboard_table(df_results=df_results, assay=assay)
	filter_columns = ["model"]
	if assay is None:
	filter_columns.append("property")
	# TODO how to sort filter columns alphabetically?
	Leaderboard(
	value=df,
	datatype=["str", "str", "str", "number"],
	select_columns=["model", "property", "spearman"],
	search_columns=["model"],
	filter_columns=filter_columns,
	every=60,
	render=True
	)

	def show_output_box(message):
	return gr.update(value=message, visible=True)

	def fetch_hf_results():
	ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload")
	df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"])
	df["property"] = df["assay"].map(ASSAY_RENAME)
	return df

	with gr.Blocks() as demo:
	gr.Markdown("""
	## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard!

	Participants can submit their model to the leaderboard by
	""")
	df = fetch_hf_results()
	with gr.Tabs(elem_classes="tab-buttons"):
	# Procedurally make these 5 tabs
	for assay in ASSAY_LIST:
	with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
	gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
	get_leaderboard_object(df_results=df, assay=assay)

	with gr.TabItem("🚀 Overall", elem_id="abdev-benchmark-tab-table"):
	gr.Markdown("# Antibody Developability Benchmark Leaderboard over all properties")

	get_leaderboard_object(df_results=df)
	# TODO: this is not going to update well, need to fix

	with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
	gr.Markdown(
	"""
	## About this challenge

	We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).

	What is antibody developability?

	Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
	Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
	Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.

	How to submit?

	TODO

	How to evaluate?

	TODO
	"""
	)

	with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
	gr.Markdown(
	"""
	# Antibody Developability Submission
	Upload a CSV to get a score!
	"""
	)
	filename = gr.State(value=None)
	eval_state = gr.State(value=None)
	user_state = gr.State(value=None)

	# gr.LoginButton()

	with gr.Row():
	with gr.Column():
	username_input = gr.Textbox(
	label="Username",
	placeholder="Enter your Hugging Face username",
	info="This will be displayed on the leaderboard."
	)
	with gr.Column():
	boundary_file = gr.File(label="Submission CSV")

	username_input.change(
	fn=lambda x: x if x.strip() else None,
	inputs=username_input,
	outputs=user_state
	)

	submit_btn = gr.Button("Evaluate")
	message = gr.Textbox(label="Status", lines=1, visible=False)
	# help message
	gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")

	submit_btn.click(
	make_submission,
	inputs=[boundary_file, user_state],
	outputs=[message],
	).then(
	fn=show_output_box,
	inputs=[message],
	outputs=[message],
	)


	if __name__ == "__main__":
	demo.launch()