File size: 5,282 Bytes
63bdadc 3edbc93 63bdadc 6921c9e 9021dc4 63bdadc 471531b e0516fa 3edbc93 50e75cf 10e69e7 672339b b2a1e67 10e69e7 1de74c6 b2a1e67 672339b 6921c9e 813ce52 63bdadc 672339b e059a0b 672339b 63bdadc 672339b 63bdadc 2dafeb1 10e69e7 672339b 10e69e7 1de74c6 471531b 6c1a62b 10e69e7 672339b 6c1a62b 10e69e7 62b6599 471531b 2dafeb1 10e69e7 672339b 10e69e7 672339b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
from pathlib import Path
import tempfile
from typing import BinaryIO
import json
import gradio as gr
from datetime import datetime, timezone
import uuid
from constants import API, SUBMISSIONS_REPO, REGISTRATION_CODE
from validation import validate_csv_file, validate_username
def upload_submission(
file_content: str,
user_state,
submission_type: str,
model_name: str,
model_description: str,
anonymous: bool = False,
):
"""Upload submission without validation (assumes validation already done)"""
timestamp = datetime.now(timezone.utc).isoformat()
date = datetime.now(timezone.utc).date().isoformat()
submission_id = str(uuid.uuid4())
# write to dataset
filename = f"{user_state}/{date}_{model_name}_{submission_id}.json" # Writing to a bit more structured name
# Note: This may be represented as Parquet in the future, so the schema needs to stay consistent
record = {
"submission_id": submission_id,
"submission_filename": filename,
"submission_time": timestamp,
"evaluated": False,
"user": user_state,
"model_name": model_name,
"model_description": model_description,
"csv_content": file_content,
"dataset": submission_type,
"anonymous": anonymous,
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
# json.dump(record, tmp, indent=2)
json.dump(record, tmp) # Note: No indent because indents and large text contents cause this error: https://github.com/huggingface/datasets/issues/3227
tmp.flush()
tmp_name = tmp.name
API.upload_file(
path_or_fileobj=tmp_name,
path_in_repo=filename,
repo_id=SUBMISSIONS_REPO,
repo_type="dataset",
commit_message=f"Add submission for {user_state} at {timestamp}",
)
Path(tmp_name).unlink()
def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
"""Validate basic file requirements and return Path object"""
file_path = file.name
if not file_path:
raise gr.Error(
f"Uploaded {file_type} file object does not have a valid file path."
)
path_obj = Path(file_path)
if path_obj.suffix.lower() != ".csv":
raise gr.Error(
f"{file_type} file must be a CSV file. Please upload a .csv file."
)
return path_obj
def make_submission(
cv_file: BinaryIO,
test_file: BinaryIO,
user_state,
model_name: str = "",
model_description: str = "",
anonymous: bool = False,
registration_code: str = "",
# profile: gr.OAuthProfile | None = None,
):
"""
Make submissions for both GDPa1 cross-validation and private test set files.
Both files are required. Validates both files before making any submissions.
"""
# if profile:
# user_state = profile.name
validate_username(user_state)
model_name = model_name.strip()
model_description = model_description.strip()
if not model_name:
raise gr.Error("Please provide a model name.")
if not model_description:
model_description = ""
if str(registration_code).strip().upper() != REGISTRATION_CODE:
raise gr.Error(
"Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>."
)
if cv_file is None:
raise gr.Error(
"Please upload the GDPa1 Cross-Validation CSV file before submitting."
)
if test_file is None:
raise gr.Error("Please upload the Private Test Set CSV file before submitting.")
files = {}
# Validate CV file
cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation")
with cv_path.open("rb") as f:
cv_content = f.read().decode("utf-8")
validate_csv_file(cv_content, "GDPa1_cross_validation")
files["cv"] = cv_content
# Validate test file
test_path = validate_file_requirements(test_file, "Private Test Set")
with test_path.open("rb") as f:
test_content = f.read().decode("utf-8")
validate_csv_file(test_content, "Heldout Test Set")
files["test"] = test_content
# If validation passes, make submissions for both files
messages = []
for file_type, file_content in files.items():
if file_type == "cv":
submission_type = "GDPa1_cross_validation"
display_name = "Cross-Validation"
else: # file_type == "test"
submission_type = "Heldout Test Set"
display_name = "Test Set"
# Upload submission without re-validating (already done)
upload_submission(
file_content=file_content,
user_state=user_state,
submission_type=submission_type,
model_name=model_name,
model_description=model_description,
anonymous=anonymous,
)
messages.append(
f"✅ {display_name}: Your submission has been received! Your results should appear on the leaderboard within a minute."
)
return "\n\n".join(messages)
|