File size: 5,282 Bytes
63bdadc
 
3edbc93
63bdadc
 
 
6921c9e
9021dc4
63bdadc
471531b
e0516fa
3edbc93
50e75cf
10e69e7
672339b
b2a1e67
10e69e7
 
 
1de74c6
b2a1e67
672339b
6921c9e
 
813ce52
63bdadc
672339b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e059a0b
 
672339b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63bdadc
672339b
 
 
 
 
 
 
 
63bdadc
2dafeb1
10e69e7
672339b
 
10e69e7
 
 
1de74c6
471531b
6c1a62b
10e69e7
672339b
 
 
 
6c1a62b
 
10e69e7
 
 
 
 
 
 
 
62b6599
471531b
2dafeb1
 
 
10e69e7
672339b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e69e7
672339b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from pathlib import Path
import tempfile
from typing import BinaryIO
import json

import gradio as gr
from datetime import datetime, timezone
import uuid

from constants import API, SUBMISSIONS_REPO, REGISTRATION_CODE
from validation import validate_csv_file, validate_username


def upload_submission(
    file_content: str,
    user_state,
    submission_type: str,
    model_name: str,
    model_description: str,
    anonymous: bool = False,
):
    """Upload submission without validation (assumes validation already done)"""
    timestamp = datetime.now(timezone.utc).isoformat()
    date = datetime.now(timezone.utc).date().isoformat()
    submission_id = str(uuid.uuid4())

    # write to dataset
    filename = f"{user_state}/{date}_{model_name}_{submission_id}.json"  # Writing to a bit more structured name
    # Note: This may be represented as Parquet in the future, so the schema needs to stay consistent
    record = {
        "submission_id": submission_id,
        "submission_filename": filename,
        "submission_time": timestamp,
        "evaluated": False,
        "user": user_state,
        "model_name": model_name,
        "model_description": model_description,
        "csv_content": file_content,
        "dataset": submission_type,
        "anonymous": anonymous,
    }
    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
        # json.dump(record, tmp, indent=2)
        json.dump(record, tmp)  # Note: No indent because indents and large text contents cause this error: https://github.com/huggingface/datasets/issues/3227
        tmp.flush()
        tmp_name = tmp.name

    API.upload_file(
        path_or_fileobj=tmp_name,
        path_in_repo=filename,
        repo_id=SUBMISSIONS_REPO,
        repo_type="dataset",
        commit_message=f"Add submission for {user_state} at {timestamp}",
    )
    Path(tmp_name).unlink()


def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
    """Validate basic file requirements and return Path object"""
    file_path = file.name
    if not file_path:
        raise gr.Error(
            f"Uploaded {file_type} file object does not have a valid file path."
        )

    path_obj = Path(file_path)
    if path_obj.suffix.lower() != ".csv":
        raise gr.Error(
            f"{file_type} file must be a CSV file. Please upload a .csv file."
        )

    return path_obj


def make_submission(
    cv_file: BinaryIO,
    test_file: BinaryIO,
    user_state,
    model_name: str = "",
    model_description: str = "",
    anonymous: bool = False,
    registration_code: str = "",
    # profile: gr.OAuthProfile | None = None,
):
    """
    Make submissions for both GDPa1 cross-validation and private test set files.
    Both files are required. Validates both files before making any submissions.
    """
    # if profile:
    #    user_state = profile.name
    validate_username(user_state)

    model_name = model_name.strip()
    model_description = model_description.strip()

    if not model_name:
        raise gr.Error("Please provide a model name.")
    if not model_description:
        model_description = ""
    if str(registration_code).strip().upper() != REGISTRATION_CODE:
        raise gr.Error(
            "Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>."
        )

    if cv_file is None:
        raise gr.Error(
            "Please upload the GDPa1 Cross-Validation CSV file before submitting."
        )
    if test_file is None:
        raise gr.Error("Please upload the Private Test Set CSV file before submitting.")

    files = {}
    # Validate CV file
    cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation")
    with cv_path.open("rb") as f:
        cv_content = f.read().decode("utf-8")
    validate_csv_file(cv_content, "GDPa1_cross_validation")
    files["cv"] = cv_content

    # Validate test file
    test_path = validate_file_requirements(test_file, "Private Test Set")
    with test_path.open("rb") as f:
        test_content = f.read().decode("utf-8")
    validate_csv_file(test_content, "Heldout Test Set")
    files["test"] = test_content

    # If validation passes, make submissions for both files
    messages = []
    for file_type, file_content in files.items():
        if file_type == "cv":
            submission_type = "GDPa1_cross_validation"
            display_name = "Cross-Validation"
        else:  # file_type == "test"
            submission_type = "Heldout Test Set"
            display_name = "Test Set"

        # Upload submission without re-validating (already done)
        upload_submission(
            file_content=file_content,
            user_state=user_state,
            submission_type=submission_type,
            model_name=model_name,
            model_description=model_description,
            anonymous=anonymous,
        )
        messages.append(
            f"✅ {display_name}: Your submission has been received! Your results should appear on the leaderboard within a minute."
        )

    return "\n\n".join(messages)