abdev-leaderboard

Running

File size: 5,282 Bytes

from pathlib import Path
import tempfile
from typing import BinaryIO
import json

import gradio as gr
from datetime import datetime, timezone
import uuid

from constants import API, SUBMISSIONS_REPO, REGISTRATION_CODE
from validation import validate_csv_file, validate_username


def upload_submission(
    file_content: str,
    user_state,
    submission_type: str,
    model_name: str,
    model_description: str,
    anonymous: bool = False,
):
    """Upload submission without validation (assumes validation already done)"""
    timestamp = datetime.now(timezone.utc).isoformat()
    date = datetime.now(timezone.utc).date().isoformat()
    submission_id = str(uuid.uuid4())

    # write to dataset
    filename = f"{user_state}/{date}_{model_name}_{submission_id}.json"  # Writing to a bit more structured name
    # Note: This may be represented as Parquet in the future, so the schema needs to stay consistent
    record = {
        "submission_id": submission_id,
        "submission_filename": filename,
        "submission_time": timestamp,
        "evaluated": False,
        "user": user_state,
        "model_name": model_name,
        "model_description": model_description,
        "csv_content": file_content,
        "dataset": submission_type,
        "anonymous": anonymous,
    }
    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
        # json.dump(record, tmp, indent=2)
        json.dump(record, tmp)  # Note: No indent because indents and large text contents cause this error: https://github.com/huggingface/datasets/issues/3227
        tmp.flush()
        tmp_name = tmp.name

    API.upload_file(
        path_or_fileobj=tmp_name,
        path_in_repo=filename,
        repo_id=SUBMISSIONS_REPO,
        repo_type="dataset",
        commit_message=f"Add submission for {user_state} at {timestamp}",
    )
    Path(tmp_name).unlink()


def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
    """Validate basic file requirements and return Path object"""
    file_path = file.name
    if not file_path:
        raise gr.Error(
            f"Uploaded {file_type} file object does not have a valid file path."
        )

    path_obj = Path(file_path)
    if path_obj.suffix.lower() != ".csv":
        raise gr.Error(
            f"{file_type} file must be a CSV file. Please upload a .csv file."
        )

    return path_obj


def make_submission(
    cv_file: BinaryIO,
    test_file: BinaryIO,
    user_state,
    model_name: str = "",
    model_description: str = "",
    anonymous: bool = False,
    registration_code: str = "",
    # profile: gr.OAuthProfile | None = None,
):
    """
    Make submissions for both GDPa1 cross-validation and private test set files.
    Both files are required. Validates both files before making any submissions.
    """
    # if profile:
    #    user_state = profile.name
    validate_username(user_state)

    model_name = model_name.strip()
    model_description = model_description.strip()

    if not model_name:
        raise gr.Error("Please provide a model name.")
    if not model_description:
        model_description = ""
    if str(registration_code).strip().upper() != REGISTRATION_CODE:
        raise gr.Error(
            "Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>."
        )

    if cv_file is None:
        raise gr.Error(
            "Please upload the GDPa1 Cross-Validation CSV file before submitting."
        )
    if test_file is None:
        raise gr.Error("Please upload the Private Test Set CSV file before submitting.")

    files = {}
    # Validate CV file
    cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation")
    with cv_path.open("rb") as f:
        cv_content = f.read().decode("utf-8")
    validate_csv_file(cv_content, "GDPa1_cross_validation")
    files["cv"] = cv_content

    # Validate test file
    test_path = validate_file_requirements(test_file, "Private Test Set")
    with test_path.open("rb") as f:
        test_content = f.read().decode("utf-8")
    validate_csv_file(test_content, "Heldout Test Set")
    files["test"] = test_content

    # If validation passes, make submissions for both files
    messages = []
    for file_type, file_content in files.items():
        if file_type == "cv":
            submission_type = "GDPa1_cross_validation"
            display_name = "Cross-Validation"
        else:  # file_type == "test"
            submission_type = "Heldout Test Set"
            display_name = "Test Set"

        # Upload submission without re-validating (already done)
        upload_submission(
            file_content=file_content,
            user_state=user_state,
            submission_type=submission_type,
            model_name=model_name,
            model_description=model_description,
            anonymous=anonymous,
        )
        messages.append(
            f"✅ {display_name}: Your submission has been received! Your results should appear on the leaderboard within a minute."
        )

    return "\n\n".join(messages)