abdev-leaderboard / constants.py
loodvanniekerkginkgo's picture
Double button working (it seems). Just need to adjust language on the
672339b
raw
history blame
2.99 kB
"""
Constants for the Antibody Developability Benchmark
"""
import os
from huggingface_hub import HfApi
ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
ASSAY_RENAME = {
"AC-SINS_pH7.4": "Self-association",
"PR_CHO": "Polyreactivity",
"HIC": "Hydrophobicity",
"Tm2": "Thermostability",
"Titer": "Titer",
}
ASSAY_DESCRIPTION = {
"AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
"PR_CHO": "Polyreactivity by bead-based method against CHO SMP",
"HIC": "Hydrophobicity by HIC",
"Tm2": "Thermostability by nanoDSF",
"Titer": "Titer by Valita",
}
ASSAY_EMOJIS = {
"AC-SINS_pH7.4": "🧲",
"PR_CHO": "🎯",
"HIC": "πŸ’§",
"Tm2": "🌑️",
"Titer": "πŸ§ͺ",
}
ASSAY_HIGHER_IS_BETTER = {
"HIC": False,
"Tm2": True,
"Titer": True,
"PR_CHO": False,
"AC-SINS_pH7.4": False,
}
# Tabs with emojis
ABOUT_TAB_NAME = "πŸ“– About / Rules"
FAQ_TAB_NAME = "❓ FAQs"
SUBMIT_TAB_NAME = "βœ‰οΈ Submit"
REGISTRATION_CODE = os.environ.get("REGISTRATION_CODE")
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
SLACK_URL = "https://join.slack.com/t/bitsinbio/shared_invite/zt-3dqigle2b-e0dEkfPPzzWL055j_8N_eQ"
TUTORIAL_URL = "https://huggingface.co/blog/ginkgo-datapoints/making-antibody-embeddings-and-predictions"
# Input CSV file requirements
REQUIRED_COLUMNS: list[str] = [
"antibody_name",
]
# Cross validation
CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
# Sequence files
SEQUENCES_FILE_DICT = {
"GDPa1": "data/GDPa1_v1.2_sequences.csv",
"GDPa1_cross_validation": "data/GDPa1_v1.2_sequences.csv",
"Heldout Test Set": "data/heldout-set-sequences.csv",
}
# GDPa1 dataset
GDPa1_path = "hf://datasets/ginkgo-datapoints/GDPa1/GDPa1_v1.2_20250814.csv"
# Huggingface API
TOKEN = os.environ.get("HF_TOKEN")
CACHE_PATH = os.getenv("HF_HOME", ".")
API = HfApi(token=TOKEN)
# Huggingface repos
ORGANIZATION = "ginkgo-datapoints"
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
# Leaderboard dataframes
LEADERBOARD_RESULTS_COLUMNS = [
"user",
"assay",
"spearman",
"dataset",
"model",
"submission_time",
] # The columns expected from the results dataset
LEADERBOARD_DISPLAY_COLUMNS = [
"user",
"property",
"spearman",
"dataset",
"model",
"submission_time",
] # After changing assay to property (pretty formatting)
LEADERBOARD_COLUMNS_RENAME = {
"spearman": "Spearman Correlation",
"dataset": "Dataset",
"user": "User",
"submission_time": "Submission Time",
"model": "Model Name",
"property": "Property",
}
BASELINE_USERNAMES = ["loodvanniekerkginkgo"]
def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))