loodvanniekerkginkgo commited on
Commit
3edbc93
·
1 Parent(s): 9e6aa1f

Adding FAQs, moving submission and utils code around

Browse files
Files changed (4) hide show
  1. about.py +26 -1
  2. app.py +47 -122
  3. submit.py +15 -38
  4. utils.py +13 -1
about.py CHANGED
@@ -21,4 +21,29 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
21
  API = HfApi(token=TOKEN)
22
  organization="ginkgo-datapoints"
23
  submissions_repo = f'{organization}/abdev-bench-submissions'
24
- results_repo = f'{organization}/abdev-bench-results'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  API = HfApi(token=TOKEN)
22
  organization="ginkgo-datapoints"
23
  submissions_repo = f'{organization}/abdev-bench-submissions'
24
+ results_repo = f'{organization}/abdev-bench-results'
25
+
26
+ ABOUT_TEXT = """
27
+ ## About this challenge
28
+
29
+ We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
30
+
31
+ **What is antibody developability?**
32
+
33
+ Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
34
+ Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
35
+ Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
36
+
37
+ **How to submit?**
38
+
39
+ TODO
40
+
41
+ **How to evaluate?**
42
+
43
+ TODO
44
+
45
+ FAQs: A list of frequently asked questions.
46
+ """
47
+ FAQS = {
48
+ "Example FAQ with dropdown": """Full answer to this question""",
49
+ }
app.py CHANGED
@@ -5,57 +5,10 @@ import pandas as pd
5
  import gradio as gr
6
  from gradio_leaderboard import Leaderboard
7
 
8
- from utils import read_submission_from_hub, write_results
9
- from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo
10
- from typing import BinaryIO, Literal
11
- from datetime import datetime
12
- import tempfile
13
- from datasets import load_dataset
14
- import io
15
-
16
- def make_submission(
17
- submitted_file: BinaryIO,
18
- user_state):
19
-
20
- if user_state is None:
21
- raise gr.Error("You must submit your username to submit a file.")
22
-
23
- file_path = submitted_file.name
24
-
25
- if not file_path:
26
- raise gr.Error("Uploaded file object does not have a valid file path.")
27
-
28
- path_obj = Path(file_path)
29
- timestamp = datetime.utcnow().isoformat()
30
-
31
- with (path_obj.open("rb") as f_in):
32
- file_content = f_in.read().decode("utf-8")
33
-
34
- # write to dataset
35
- filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json"
36
- record = {
37
- "submission_filename": filename,
38
- "submission_time": timestamp,
39
- "csv_content": file_content,
40
- "evaluated": False,
41
- "user": user_state,
42
- }
43
- with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
44
- json.dump(record, tmp, indent=2)
45
- tmp.flush()
46
- tmp_name = tmp.name
47
-
48
- API.upload_file(
49
- path_or_fileobj=tmp_name,
50
- path_in_repo=filename,
51
- repo_id=submissions_repo,
52
- repo_type="dataset",
53
- commit_message=f"Add submission for {user_state} at {timestamp}"
54
- )
55
- Path(tmp_name).unlink()
56
-
57
- return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."
58
-
59
 
60
  def get_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
61
  # ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
@@ -92,16 +45,6 @@ def get_leaderboard_object(df_results: pd.DataFrame, assay: str | None = None):
92
  render=True
93
  )
94
 
95
- def show_output_box(message):
96
- return gr.update(value=message, visible=True)
97
-
98
- def fetch_hf_results():
99
- ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload")
100
- df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"])
101
- df["property"] = df["assay"].map(ASSAY_RENAME)
102
- print(df.head())
103
- return df
104
-
105
  with gr.Blocks() as demo:
106
  gr.Markdown("""
107
  ## Welcome to the Ginkgo Antibody Developability Benchmark!
@@ -148,71 +91,53 @@ with gr.Blocks() as demo:
148
  elem_classes=["resized-image"],
149
  show_download_button=False,
150
  )
 
 
 
 
 
151
  gr.Markdown(
152
  """
153
- ## About this challenge
154
-
155
- We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
156
-
157
- **What is antibody developability?**
158
-
159
- Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
160
- Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
161
- Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
162
-
163
- **How to submit?**
164
-
165
- TODO
166
-
167
- **How to evaluate?**
168
-
169
- TODO
170
  """
171
  )
172
-
173
- with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
174
- gr.Markdown(
175
- """
176
- # Antibody Developability Submission
177
- Upload a CSV to get a score!
178
- """
179
- )
180
- filename = gr.State(value=None)
181
- eval_state = gr.State(value=None)
182
- user_state = gr.State(value=None)
183
-
184
- # gr.LoginButton()
185
-
186
- with gr.Row():
187
- with gr.Column():
188
- username_input = gr.Textbox(
189
- label="Username",
190
- placeholder="Enter your Hugging Face username",
191
- info="This will be displayed on the leaderboard."
192
- )
193
- with gr.Column():
194
- boundary_file = gr.File(label="Submission CSV")
195
-
196
- username_input.change(
197
- fn=lambda x: x if x.strip() else None,
198
- inputs=username_input,
199
- outputs=user_state
200
- )
201
-
202
- submit_btn = gr.Button("Evaluate")
203
- message = gr.Textbox(label="Status", lines=1, visible=False)
204
- # help message
205
- gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
206
-
207
- submit_btn.click(
208
- make_submission,
209
- inputs=[boundary_file, user_state],
210
- outputs=[message],
211
- ).then(
212
- fn=show_output_box,
213
- inputs=[message],
214
- outputs=[message],
215
- )
216
 
217
 
218
  if __name__ == "__main__":
 
5
  import gradio as gr
6
  from gradio_leaderboard import Leaderboard
7
 
8
+ from utils import fetch_hf_results, show_output_box
9
+ from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API
10
+ from submit import make_submission
11
+ from about import ABOUT_TEXT, FAQS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def get_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
14
  # ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
 
45
  render=True
46
  )
47
 
 
 
 
 
 
 
 
 
 
 
48
  with gr.Blocks() as demo:
49
  gr.Markdown("""
50
  ## Welcome to the Ginkgo Antibody Developability Benchmark!
 
91
  elem_classes=["resized-image"],
92
  show_download_button=False,
93
  )
94
+ gr.Markdown(ABOUT_TEXT)
95
+ for question, answer in FAQS.items():
96
+ gr.Accordion(question, answer)
97
+
98
+ with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
99
  gr.Markdown(
100
  """
101
+ # Antibody Developability Submission
102
+ Upload a CSV to get a score!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
  )
105
+ filename = gr.State(value=None)
106
+ eval_state = gr.State(value=None)
107
+ user_state = gr.State(value=None)
108
+
109
+ # gr.LoginButton()
110
+
111
+ with gr.Row():
112
+ with gr.Column():
113
+ username_input = gr.Textbox(
114
+ label="Username",
115
+ placeholder="Enter your Hugging Face username",
116
+ info="This will be displayed on the leaderboard."
117
+ )
118
+ with gr.Column():
119
+ boundary_file = gr.File(label="Submission CSV")
120
+
121
+ username_input.change(
122
+ fn=lambda x: x if x.strip() else None,
123
+ inputs=username_input,
124
+ outputs=user_state
125
+ )
126
+
127
+ submit_btn = gr.Button("Evaluate")
128
+ message = gr.Textbox(label="Status", lines=1, visible=False)
129
+ # help message
130
+ gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
131
+
132
+ submit_btn.click(
133
+ make_submission,
134
+ inputs=[boundary_file, user_state],
135
+ outputs=[message],
136
+ ).then(
137
+ fn=show_output_box,
138
+ inputs=[message],
139
+ outputs=[message],
140
+ )
 
 
 
 
 
 
 
 
141
 
142
 
143
  if __name__ == "__main__":
submit.py CHANGED
@@ -1,58 +1,37 @@
1
- import pathlib
2
  from pathlib import Path
3
  import tempfile
4
- from typing import BinaryIO, Literal
5
  import json
6
- import pandas as pd
7
 
8
  import gradio as gr
9
- from datasets import load_dataset, Dataset
10
- from huggingface_hub import upload_file, hf_hub_download
11
- from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
12
- from evaluation import evaluate_problem
13
  from datetime import datetime
14
- import os
15
- from huggingface_hub import HfApi
16
 
17
- from about import PROBLEM_TYPES, TOKEN, CACHE_PATH, API, submissions_repo, results_repo
 
 
 
 
18
 
19
- def submit_boundary(
20
- problem_type: Literal["geometrical", "simple_to_build", "mhd_stable"],
21
- boundary_file: BinaryIO,
22
- user_state
23
- ) -> str:
24
-
25
- # profile: gr.OAuthProfile | None
26
- # user_state = profile.username
27
-
28
- # error handling
29
- # if profile.username is None:
30
  if user_state is None:
31
  raise gr.Error("You must submit your username to submit a file.")
32
 
33
- file_path = boundary_file.name
34
 
35
  if not file_path:
36
  raise gr.Error("Uploaded file object does not have a valid file path.")
37
 
38
- path_obj = pathlib.Path(file_path)
39
  timestamp = datetime.utcnow().isoformat()
40
 
41
- with (
42
- path_obj.open("rb") as f_in,
43
- tempfile.NamedTemporaryFile(delete=False, suffix=".json") as tmp_boundary,
44
- ):
45
- file_content = f_in.read()
46
- tmp_boundary.write(file_content)
47
- tmp_boundary_path = pathlib.Path(tmp_boundary.name)
48
 
49
  # write to dataset
50
- filename = f"{problem_type}/{timestamp.replace(':', '-')}_{problem_type}.json"
51
  record = {
52
  "submission_filename": filename,
53
  "submission_time": timestamp,
54
- "problem_type": problem_type,
55
- "boundary_json": file_content.decode("utf-8"),
56
  "evaluated": False,
57
  "user": user_state,
58
  }
@@ -66,10 +45,8 @@ def submit_boundary(
66
  path_in_repo=filename,
67
  repo_id=submissions_repo,
68
  repo_type="dataset",
69
- commit_message=f"Add submission for {problem_type} at {timestamp}"
70
  )
71
- pathlib.Path(tmp_name).unlink()
72
-
73
- tmp_boundary_path.unlink()
74
 
75
- return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly.", filename
 
 
1
  from pathlib import Path
2
  import tempfile
3
+ from typing import BinaryIO
4
  import json
 
5
 
6
  import gradio as gr
 
 
 
 
7
  from datetime import datetime
 
 
8
 
9
+ from about import API, submissions_repo
10
+
11
+ def make_submission(
12
+ submitted_file: BinaryIO,
13
+ user_state):
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  if user_state is None:
16
  raise gr.Error("You must submit your username to submit a file.")
17
 
18
+ file_path = submitted_file.name
19
 
20
  if not file_path:
21
  raise gr.Error("Uploaded file object does not have a valid file path.")
22
 
23
+ path_obj = Path(file_path)
24
  timestamp = datetime.utcnow().isoformat()
25
 
26
+ with (path_obj.open("rb") as f_in):
27
+ file_content = f_in.read().decode("utf-8")
 
 
 
 
 
28
 
29
  # write to dataset
30
+ filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json"
31
  record = {
32
  "submission_filename": filename,
33
  "submission_time": timestamp,
34
+ "csv_content": file_content,
 
35
  "evaluated": False,
36
  "user": user_state,
37
  }
 
45
  path_in_repo=filename,
46
  repo_id=submissions_repo,
47
  repo_type="dataset",
48
+ commit_message=f"Add submission for {user_state} at {timestamp}"
49
  )
50
+ Path(tmp_name).unlink()
 
 
51
 
52
+ return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."
utils.py CHANGED
@@ -3,9 +3,11 @@ import tempfile
3
  import json
4
 
5
  import gradio as gr
 
 
6
  from huggingface_hub import hf_hub_download
7
 
8
- from about import API, submissions_repo, results_repo
9
 
10
  # def make_user_clickable(name):
11
  # link =f'https://huggingface.co/{name}'
@@ -15,6 +17,16 @@ from about import API, submissions_repo, results_repo
15
  # link =f'https://huggingface.co/datasets/proxima-fusion/constellaration-bench-results/blob/main/{filename}'
16
  # return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
17
 
 
 
 
 
 
 
 
 
 
 
18
  def read_result_from_hub(filename):
19
  local_path = hf_hub_download(
20
  repo_id=results_repo,
 
3
  import json
4
 
5
  import gradio as gr
6
+ import pandas as pd
7
+ from datasets import load_dataset
8
  from huggingface_hub import hf_hub_download
9
 
10
+ from about import API, submissions_repo, results_repo, ASSAY_RENAME
11
 
12
  # def make_user_clickable(name):
13
  # link =f'https://huggingface.co/{name}'
 
17
  # link =f'https://huggingface.co/datasets/proxima-fusion/constellaration-bench-results/blob/main/{filename}'
18
  # return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
19
 
20
+ def show_output_box(message):
21
+ return gr.update(value=message, visible=True)
22
+
23
+ def fetch_hf_results():
24
+ ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload")
25
+ df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"])
26
+ df["property"] = df["assay"].map(ASSAY_RENAME)
27
+ print(df.head())
28
+ return df
29
+
30
  def read_result_from_hub(filename):
31
  local_path = hf_hub_download(
32
  repo_id=results_repo,