SushantGautam commited on
Commit
5e6ba6a
Β·
1 Parent(s): 947c62b

Add submission portal and task scripts for MediaEval Medico 2025 challenge

Browse files
gradio_interface.py β†’ medvqa/competitions/gi-2025/submission_portal.py RENAMED
File without changes
medvqa/competitions/medico-2025/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file can be empty or contain package initialization code
medvqa/competitions/medico-2025/submission_portal.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import subprocess
3
+ import gradio as gr
4
+ import json
5
+ from datetime import datetime, timezone
6
+ from huggingface_hub import upload_file, snapshot_download
7
+ import shutil
8
+ import os
9
+ import glob
10
+ from pathlib import Path
11
+ from huggingface_hub import whoami
12
+ import platform
13
+
14
+ print(subprocess.check_output(
15
+ [sys.executable, "-m", "pip", "list"]).decode("utf-8"))
16
+ print({
17
+ "python": platform.python_version(),
18
+ "os": platform.system(),
19
+ "platform": platform.platform(),
20
+ "arch": platform.machine()
21
+ })
22
+ print("Account token used to connect to HuggingFace: ", whoami()['name'])
23
+
24
+
25
+ SUBMISSION_REPO = "SimulaMet/medico-2025-submissions"
26
+ hub_path = None
27
+ submissions = None
28
+ last_submission_update_time = datetime.now(timezone.utc)
29
+
30
+
31
+ def refresh_submissions():
32
+ global hub_path, submissions, last_submission_update_time
33
+ if hub_path and Path(hub_path).exists():
34
+ shutil.rmtree(hub_path, ignore_errors=True)
35
+ print("Deleted existing submissions")
36
+
37
+ hub_path = snapshot_download(
38
+ repo_type="dataset", repo_id=SUBMISSION_REPO, allow_patterns=['**/*.json'])
39
+ print("Downloaded submissions to:", hub_path)
40
+ if not os.path.exists(hub_path):
41
+ os.makedirs(hub_path)
42
+
43
+ all_jsons = glob.glob(hub_path + "/**/*.json", recursive=True)
44
+ print("json_files count:", len(all_jsons))
45
+
46
+ submissions = []
47
+ for file in all_jsons:
48
+ file_ = file.split("/")[-1]
49
+ username, sub_timestamp, task = file_.replace(
50
+ ".json", "").split("-_-_-")
51
+ json_data = json.load(open(file))
52
+ public_score = json.dumps(json_data.get("public_scores", {}))
53
+ submissions.append({"user": username, "task": task, "public_score": public_score,
54
+ "submitted_time": sub_timestamp})
55
+
56
+ last_submission_update_time = datetime.now(timezone.utc)
57
+ return hub_path
58
+
59
+
60
+ hub_path = refresh_submissions()
61
+ hub_dir = hub_path.split("snapshot")[0] + "snapshot"
62
+
63
+
64
+ def time_ago(submitted_time):
65
+ return str(datetime.fromtimestamp(int(submitted_time), tz=timezone.utc)) + " UTC"
66
+
67
+
68
+ def filter_submissions(task_type, search_query):
69
+ if search_query == "":
70
+ filtered = [s for s in submissions if task_type ==
71
+ "all" or s["task"] == task_type]
72
+ else:
73
+ filtered = [s for s in submissions if (
74
+ task_type == "all" or s["task"] == task_type) and search_query.lower() in s["user"].lower()]
75
+ return [{"user": s["user"], "task": s["task"], "public_score": s["public_score"], "submitted_time": time_ago(s["submitted_time"])} for s in filtered]
76
+
77
+
78
+ def display_submissions(task_type="all", search_query=""):
79
+ if submissions is None or ((datetime.now(timezone.utc) - last_submission_update_time).total_seconds() > 3600):
80
+ refresh_submissions()
81
+ filtered_submissions = filter_submissions(task_type, search_query)
82
+ return [[s["user"], s["task"], s["submitted_time"], s["public_score"]] for s in filtered_submissions]
83
+
84
+
85
+ def add_submission(file):
86
+ global submissions
87
+ try:
88
+ with open(file, 'r', encoding='utf-8') as f:
89
+ data = json.load(f)
90
+
91
+ filename = os.path.basename(file)
92
+ username, sub_timestamp, task = filename.replace(
93
+ ".json", "").split("-_-_-")
94
+ submission_time = datetime.fromtimestamp(
95
+ int(sub_timestamp), tz=timezone.utc)
96
+
97
+ assert task in ["task1", "task2"], "Invalid task type"
98
+ assert len(username) > 0, "Invalid username"
99
+ assert submission_time < datetime.now(
100
+ timezone.utc), "Invalid submission time"
101
+
102
+ upload_file(
103
+ repo_type="dataset",
104
+ path_or_fileobj=file,
105
+ path_in_repo=task + "/" + filename,
106
+ repo_id=SUBMISSION_REPO
107
+ )
108
+ refresh_submissions()
109
+ return "πŸ’ͺπŸ†πŸŽ‰ Submissions registered successfully to the system!"
110
+ except Exception as e:
111
+ return f"❌ Error adding submission: {e}"
112
+
113
+
114
+ def refresh_page():
115
+ return "Pong! Submission server is alive! 😊"
116
+
117
+
118
+ # Define Gradio Interface
119
+ with gr.Blocks(title="🌟 MediaEval Medico 2025 Submissions 🌟") as demo:
120
+ gr.Markdown("""
121
+ # 🌟 Welcome to the official submission portal for the **[MediaEval Medico 2025](https://multimediaeval.github.io/editions/2025/tasks/medico/)** challenge! πŸ₯πŸ”
122
+ ### πŸ“‹ [**GitHub Repository**](https://github.com/simula/MediaEval-Medico-2025) | πŸ”— [**MediaEval 2025 Task Page**](https://multimediaeval.github.io/editions/2025/tasks/medico/)
123
+ ---
124
+ The **MediaEval Medico 2025 Challenge** πŸ”¬ focuses on **Visual Question Answering (VQA)** for **Gastrointestinal (GI) imaging**, emphasizing **explainability** πŸ€”πŸ“– to foster **trustworthy AI** for clinical adoption βš•οΈ.
125
+ This task continues the long-running **Medico series** at MediaEval and leverages the newly developed **Kvasir-VQA-x1** dataset, enabling **multimodal reasoning** and **interpretable clinical decision support** πŸ“ˆ.
126
+ ### πŸ“¦ [**Available Datasets**](https://github.com/simula/MediaEval-Medico-2025#-dataset-overview-kvasir-vqa-x1) | 🧠 [**Task Details & Training Resources**](https://github.com/simula/MediaEval-Medico-2025?tab=readme-ov-file#-task-descriptions) | πŸ“ [**Submission Instructions**](https://github.com/simula/MediaEval-Medico-2025#submission)
127
+ ---
128
+ """)
129
+
130
+ with gr.Tab("View Submissions"):
131
+ gr.Markdown("### Filter and Search Submissions")
132
+
133
+ with gr.Row():
134
+ with gr.Column(scale=1):
135
+ task_type_dropdown = gr.Dropdown(
136
+ choices=["all", "task1", "task2"],
137
+ value="all",
138
+ label="Task Type"
139
+ )
140
+ search_box = gr.Textbox(
141
+ label="Search by Username",
142
+ placeholder="Enter username..."
143
+ )
144
+
145
+ with gr.Column(scale=6):
146
+ output_table = gr.Dataframe(
147
+ headers=["User", "Task", "Submitted Time", "Public Score"],
148
+ interactive=False,
149
+ wrap=True,
150
+ column_widths=["100px", "50px", "80px", "200px"],
151
+ label="Submissions"
152
+ )
153
+
154
+ task_type_dropdown.change(
155
+ fn=display_submissions,
156
+ inputs=[task_type_dropdown, search_box],
157
+ outputs=output_table
158
+ )
159
+ search_box.change(
160
+ fn=display_submissions,
161
+ inputs=[task_type_dropdown, search_box],
162
+ outputs=output_table
163
+ )
164
+
165
+ gr.Markdown(
166
+ f'''
167
+ πŸ”„ Last refreshed: {last_submission_update_time.strftime('%Y-%m-%d %H:%M:%S')} UTC | πŸ“Š Total Submissions: {len(submissions)}
168
+
169
+ πŸ’¬ For any questions or issues, [contact the organizers](https://github.com/simula/MediaEval-Medico-2025#-organizers) or check the documentation in the [GitHub repo](https://github.com/simula/MediaEval-Medico-2025). Good luck and thank you for contributing to medical AI research! πŸ’ͺπŸ€–πŸŒ
170
+ ''')
171
+
172
+ with gr.Tab("Upload Submission", visible=False):
173
+ file_input = gr.File(label="Upload JSON", file_types=[".json"])
174
+ upload_output = gr.Textbox(label="Upload Result")
175
+ file_input.upload(fn=add_submission,
176
+ inputs=file_input, outputs=upload_output)
177
+
178
+ with gr.Tab("Refresh API", visible=False):
179
+ refresh_button = gr.Button("Refresh")
180
+ status_output = gr.Textbox(label="Status")
181
+ refresh_button.click(fn=refresh_page, inputs=[], outputs=status_output)
182
+
183
+ demo.load(lambda: display_submissions("all", ""),
184
+ inputs=[], outputs=output_table)
185
+
186
+ demo.launch()
medvqa/competitions/medico-2025/task_1.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+ from huggingface_hub import snapshot_download, login, whoami
3
+ import argparse
4
+ import os
5
+ import subprocess as sp
6
+ import time
7
+ from datetime import datetime, timezone
8
+ import shutil # Add this import
9
+ import json
10
+ from huggingface_hub import HfApi, grant_access
11
+ import re
12
+
13
+ HF_GATE_ACESSLIST = ["SushantGautam",
14
+ "stevenah", "vlbthambawita"]
15
+
16
+ MEDVQA_SUBMIT = True if os.environ.get(
17
+ '_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
18
+ parser = argparse.ArgumentParser(description='Run GI-1015 Task 1 (VQA)')
19
+ parser.add_argument('--repo_id', type=str, required=True,
20
+ help='Path to the HF submission repository')
21
+ args, _ = parser.parse_known_args()
22
+
23
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
24
+ submission_file = "submission_task1.py"
25
+ file_from_validation = "predictions_1.json"
26
+
27
+ min_library = ["datasets>=3.4.1", "transformers", "evaluate",
28
+ "rouge_score", 'tqdm', "gradio_client>=1.8.0"]
29
+
30
+ print("🌟 ImageCLEFmed-MEDVQA-GI-2025 🌟",
31
+ "https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025")
32
+ print("πŸ” Subtask 1: Algorithm Development for Question Interpretation and Response")
33
+ print(f"πŸ‘€ Analyzing submission repository: {args.repo_id} πŸ‘€")
34
+
35
+ try:
36
+ print(f"Logged in to HuggingFace as: {whoami()['name']}")
37
+ except Exception:
38
+ print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
39
+ login()
40
+
41
+ client = Client("SimulaMet/medvqa")
42
+ print("πŸ’“ Communicating with the Submission Server: Ping!")
43
+ result = client.predict(
44
+ api_name="/refresh_page"
45
+ )
46
+ print(result)
47
+
48
+
49
+ hf_username = whoami()['name']
50
+ assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
51
+ current_timestamp = int(time.time())
52
+
53
+ snap_dir = snapshot_download(
54
+ repo_id=args.repo_id, allow_patterns=[submission_file, "requirements.txt"])
55
+
56
+ if not os.path.isfile(os.path.join(snap_dir, submission_file)):
57
+ raise FileNotFoundError(
58
+ f"Submission file '{submission_file}' not found in the repository!")
59
+
60
+ if os.path.isfile(os.path.join(snap_dir, file_from_validation)):
61
+ os.remove(os.path.join(snap_dir, file_from_validation))
62
+
63
+ print("πŸ“¦ Making sure of the minimum requirements to run the script πŸ“¦")
64
+ sp.run(["python", "-m", "pip", "install", "-q"] + min_library, check=True)
65
+
66
+ if os.path.isfile(os.path.join(snap_dir, "requirements.txt")):
67
+ print(
68
+ f"πŸ“¦ Installing requirements from the submission repo: {args.repo_id}/requirements.txt")
69
+ sp.run(["python", "-m", "pip", "install", "-q", "-r",
70
+ f"{snap_dir}/requirements.txt"], cwd=snap_dir, check=True)
71
+
72
+
73
+ if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
74
+ # Patch submission file for challenge evaluation
75
+ challenge_file = submission_file.replace(".py", "_challenge.py")
76
+ submission_path = os.path.join(snap_dir, submission_file)
77
+ challenge_path = os.path.join(snap_dir, challenge_file)
78
+ with open(submission_path, "r", encoding="utf-8") as f:
79
+ code = f.read()
80
+ # Use regex to match the line, ignoring whitespace
81
+ pattern = r'val_dataset\s*=\s*load_dataset\(\s*["\']SimulaMet/Kvasir-VQA-test["\']\s*,\s*split\s*=\s*["\']validation["\']\s*\)'
82
+ new_line = 'val_dataset = load_dataset("SimulaMet/Kvasir-VQA-private", split="test")'
83
+ if re.search(pattern, code):
84
+ code = re.sub(pattern, new_line, code)
85
+ with open(challenge_path, "w", encoding="utf-8") as f:
86
+ f.write(code)
87
+ submission_file = challenge_file
88
+ print(f"πŸ”„ Challenge file created at: {challenge_path}")
89
+ else:
90
+ print("⚠️ Challenge patch not applied: expected line not found in submission file.")
91
+ os.exit(
92
+ "Please check the submission file for compatibility with challenge evaluation.")
93
+
94
+
95
+ sp.run(["python", f"{snap_dir}/{submission_file}"],
96
+ cwd=snap_dir, check=True)
97
+ print(
98
+ f"πŸŽ‰ The submission script ran successfully, the intermediate files are at {snap_dir}")
99
+
100
+ if not MEDVQA_SUBMIT:
101
+ print("\n You can now run medvqa validate_and_submit .... command to submit the task.")
102
+ else:
103
+ print("πŸš€ Preparing for submission πŸš€")
104
+ file_path_to_upload = os.path.join(
105
+ snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task1.json")
106
+ shutil.copy(os.path.join(snap_dir, file_from_validation),
107
+ file_path_to_upload) # Use shutil.copy here
108
+ # add repo_id to the submission file
109
+ with open(file_path_to_upload, 'r', encoding='utf-8') as f:
110
+ data = json.load(f)
111
+ data['repo_id'] = args.repo_id
112
+ with open(file_path_to_upload, 'w', encoding='utf-8') as f:
113
+ json.dump(data, f, ensure_ascii=False)
114
+ api = HfApi()
115
+ api.update_repo_visibility(args.repo_id, private=False) # Make public
116
+ api.update_repo_settings(
117
+ args.repo_id, gated='manual') # Enable gated access
118
+ for user in HF_GATE_ACESSLIST:
119
+ try:
120
+ grant_access(args.repo_id, user) # Grant access
121
+ except Exception as e:
122
+ print(user, ":", e)
123
+ print(
124
+ f'''βœ… {args.repo_id} model is now made public, but gated, and is shared with organizers.
125
+ You should not make the model private or remove/update it until the competition results are announced.
126
+ Feel feel to re-submit the task if you change the model on the repository.
127
+ We will notify you if there are any issues with the submission.
128
+ ''')
129
+
130
+ result = client.predict(
131
+ file=handle_file(file_path_to_upload),
132
+ api_name="/add_submission"
133
+ )
134
+ print({"User": hf_username, "Task": "task1",
135
+ "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
136
+ })
137
+ print(result)
138
+ print("Visit this URL to see the entry: πŸ‘‡")
139
+ Client("SimulaMet/medvqa")
140
+
141
+
142
+ if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
143
+ src_json = os.path.join(snap_dir, "predictions_1.json")
144
+ if os.path.isfile(src_json):
145
+ with open(src_json, "r", encoding="utf-8") as f:
146
+ data = json.load(f)
147
+ # Remove 'debug' key if present
148
+ data.pop("debug", None)
149
+ # Rename 'public_scores' to 'challenge_scores' if present
150
+ if "public_scores" in data:
151
+ data["challenge_scores"] = data.pop("public_scores")
152
+ # Get Team_Name from submission_info
153
+ team_name = data.get("submission_info", {}).get(
154
+ "Team_Name", "unknown_team")
155
+ team_name_safe = re.sub(r'[^a-zA-Z0-9_\-]', '_', team_name)
156
+ out_json = os.path.join(os.getcwd(), f"task1_{team_name_safe}.json")
157
+ with open(out_json, "w", encoding="utf-8") as f:
158
+ json.dump(data, f, ensure_ascii=False, indent=2)
159
+ print(f"βœ… Copied and processed predictions to: {out_json}")
160
+ else:
161
+ print("❌ predictions_1.json not found in snapshot directory!")
162
+ # === End: Post-processing predictions_1.json ===
medvqa/competitions/medico-2025/task_2.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+ from huggingface_hub import snapshot_download, login, whoami
3
+ import argparse
4
+ import os
5
+ import subprocess as sp
6
+ import time
7
+ from datetime import datetime, timezone
8
+ import shutil # Add this import
9
+ import json
10
+ from huggingface_hub import HfApi, grant_access
11
+ import re
12
+
13
+ HF_GATE_ACESSLIST = ["SushantGautam",
14
+ "stevenah", "vlbthambawita"]
15
+
16
+ MEDVQA_SUBMIT = True if os.environ.get(
17
+ '_MEDVQA_SUBMIT_FLAG_', 'FALSE') == 'TRUE' else False
18
+ parser = argparse.ArgumentParser(
19
+ description='Run GI-1015 Task 2 (Image Generation)')
20
+ parser.add_argument('--repo_id', type=str, required=True,
21
+ help='Path to the HF submission repository')
22
+ args, _ = parser.parse_known_args()
23
+
24
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
25
+ submission_file = "submission_task2.py"
26
+ file_from_validation = "predictions_2.json"
27
+
28
+ min_library = ["datasets>=3.4.1", "transformers", "evaluate", "scipy", "scikit-learn", "diffusers", "peft",
29
+ "rouge_score", 'tqdm', "gradio_client>=1.8.0"]
30
+
31
+ print("🌟 ImageCLEFmed-MEDVQA-GI-2025 🌟",
32
+ "https://github.com/simula/ImageCLEFmed-MEDVQA-GI-2025")
33
+ print("πŸ” Subtask 2: Creation of High-Fidelity Synthetic GI Images")
34
+ print(f"πŸ‘€ Analyzing submission repository: {args.repo_id} πŸ‘€")
35
+
36
+ try:
37
+ print(f"Logged in to HuggingFace as: {whoami()['name']}")
38
+ except Exception:
39
+ print("⚠️⚠️ Not logged in to HuggingFace! Please get your login token from https://huggingface.co/settings/tokens 🌐")
40
+ login()
41
+
42
+ client = Client("SimulaMet/medvqa")
43
+ print("πŸ’“ Communicating with the Submission Server: Ping!")
44
+ result = client.predict(
45
+ api_name="/refresh_page"
46
+ )
47
+ print(result)
48
+
49
+
50
+ hf_username = whoami()['name']
51
+ assert len(hf_username) > 0, "🚫 HuggingFace login failed for some reason"
52
+ current_timestamp = int(time.time())
53
+
54
+ snap_dir = snapshot_download(
55
+ repo_id=args.repo_id, allow_patterns=[submission_file, "requirements.txt"])
56
+
57
+ if not os.path.isfile(os.path.join(snap_dir, submission_file)):
58
+ raise FileNotFoundError(
59
+ f"Submission file '{submission_file}' not found in the repository!")
60
+
61
+ if os.path.isfile(os.path.join(snap_dir, file_from_validation)):
62
+ os.remove(os.path.join(snap_dir, file_from_validation))
63
+
64
+ print("πŸ“¦ Making sure of the minimum requirements to run the script πŸ“¦")
65
+ sp.run(["python", "-m", "pip", "install", "-q"] + min_library, check=True)
66
+
67
+ if os.path.isfile(os.path.join(snap_dir, "requirements.txt")):
68
+ print(
69
+ f"πŸ“¦ Installing requirements from the submission repo: {args.repo_id}/requirements.txt")
70
+ sp.run(["python", "-m", "pip", "install", "-q", "-r",
71
+ f"{snap_dir}/requirements.txt"], cwd=snap_dir, check=True)
72
+
73
+ if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
74
+ # Patch submission file for challenge evaluation
75
+ challenge_file = submission_file.replace(".py", "_challenge.py")
76
+ submission_path = os.path.join(snap_dir, submission_file)
77
+ challenge_path = os.path.join(snap_dir, challenge_file)
78
+ with open(submission_path, "r", encoding="utf-8") as f:
79
+ code = f.read()
80
+ # Replace only the dataset string
81
+ if "SimulaMet/Kvasir-VQA-test" in code:
82
+ code = code.replace("SimulaMet/Kvasir-VQA-test",
83
+ "SimulaMet/Kvasir-VQA-private")
84
+ code = code.replace('"validation"', '"test"')
85
+ # Comment out specific lines
86
+ lines = code.splitlines()
87
+ for i, line in enumerate(lines):
88
+ if ("huggingface.co/datasets" in line or
89
+ re.search(r'^\s*prompt_to_real\s*=', line) or
90
+ re.search(r'^\s*jsons__\s*=', line)):
91
+ if not line.lstrip().startswith("#"):
92
+ leading_ws = len(line) - len(line.lstrip())
93
+ lines[i] = line[:leading_ws] + "# " + line[leading_ws:]
94
+ # Insert new code block after 'import requests'
95
+ for i, line in enumerate(lines):
96
+ if "import requests" in line:
97
+ insert_idx = i + 1
98
+ break
99
+ else:
100
+ insert_idx = None
101
+ new_block = [
102
+ 'from huggingface_hub import hf_hub_download',
103
+ 'prompt_to_real = json.load(open(hf_hub_download("SimulaMet/Kvasir-VQA-private", "real_mapping", repo_type="dataset")))',
104
+ 'jsons__ = json.load(open(hf_hub_download("SimulaMet/Kvasir-VQA-private", "imagen-test", repo_type="dataset")))',
105
+ ]
106
+ if insert_idx is not None:
107
+ lines[insert_idx:insert_idx] = new_block
108
+ code = "\n".join(lines)
109
+ with open(challenge_path, "w", encoding="utf-8") as f:
110
+ f.write(code)
111
+ submission_file = challenge_file
112
+ print(f"πŸ”„ Challenge file created at: {challenge_path}")
113
+ else:
114
+ print(
115
+ "⚠️ Challenge patch not applied: expected string not found in submission file.")
116
+ os.exit(
117
+ "Please check the submission file for compatibility with challenge evaluation.")
118
+
119
+ print("πŸ” Starting your script and loading submission details...")
120
+ sp.run(["python", f"{snap_dir}/{submission_file}"],
121
+ cwd=snap_dir, check=True)
122
+ print(
123
+ f"πŸŽ‰ The submission script ran successfully, the intermediate files are at {snap_dir}")
124
+
125
+ if not MEDVQA_SUBMIT:
126
+ print("\n You can now run medvqa validate_and_submit .... command to submit the task.")
127
+ else:
128
+ print("πŸš€ Preparing for submission πŸš€")
129
+ file_path_to_upload = os.path.join(
130
+ snap_dir, f"{hf_username}-_-_-{current_timestamp}-_-_-task2.json")
131
+ shutil.copy(os.path.join(snap_dir, file_from_validation),
132
+ file_path_to_upload) # Use shutil.copy here
133
+ # add repo_id to the submission file
134
+ with open(file_path_to_upload, 'r', encoding='utf-8') as f:
135
+ data = json.load(f)
136
+ data['repo_id'] = args.repo_id
137
+ with open(file_path_to_upload, 'w', encoding='utf-8') as f:
138
+ json.dump(data, f, ensure_ascii=False)
139
+ api = HfApi()
140
+ api.update_repo_visibility(args.repo_id, private=False) # Make public
141
+ api.update_repo_settings(
142
+ args.repo_id, gated='manual') # Enable gated access
143
+ for user in HF_GATE_ACESSLIST:
144
+ try:
145
+ grant_access(args.repo_id, user) # Grant access
146
+ except Exception as e:
147
+ print(user, ":", e)
148
+ print(
149
+ f'''βœ… {args.repo_id} model is now made public, but gated, and is shared with organizers.
150
+ You should not make the model private or remove/update it until the competition results are announced.
151
+ Feel feel to re-submit the task if you change the model on the repository.
152
+ We will notify you if there are any issues with the submission.
153
+ ''')
154
+
155
+ result = client.predict(
156
+ file=handle_file(file_path_to_upload),
157
+ api_name="/add_submission"
158
+ )
159
+ print({"User": hf_username, "Task": "task2",
160
+ "Submitted_time": str(datetime.fromtimestamp(int(current_timestamp), tz=timezone.utc)) + " UTC"
161
+ })
162
+ print(result)
163
+ print("Visit this URL to see the entry: πŸ‘‡")
164
+ Client("SimulaMet/medvqa")
165
+
166
+ if os.environ.get("_MEDVQA_CHALLENGE_EVALUATE_FLAG_", "FALSE") == "TRUE":
167
+ src_json = os.path.join(snap_dir, "predictions_2.json")
168
+ if os.path.isfile(src_json):
169
+ with open(src_json, "r", encoding="utf-8") as f:
170
+ data = json.load(f)
171
+ # Remove 'debug' key if present
172
+ data.pop("debug", None)
173
+ # Rename 'public_scores' to 'challenge_scores' if present
174
+ if "public_scores" in data:
175
+ data["challenge_scores"] = data.pop("public_scores")
176
+ # Get Team_Name from submission_info
177
+ team_name = data.get("submission_info", {}).get(
178
+ "Team_Name", "unknown_team")
179
+ team_name_safe = re.sub(r'[^a-zA-Z0-9_\-]', '_', team_name)
180
+ out_json = os.path.join(os.getcwd(), f"task2_{team_name_safe}.json")
181
+ with open(out_json, "w", encoding="utf-8") as f:
182
+ json.dump(data, f, ensure_ascii=False, indent=2)
183
+ print(f"βœ… Copied and processed predictions to: {out_json}")
184
+ else:
185
+ print("❌ predictions_1.json not found in snapshot directory!")
186
+ # === End: Post-processing predictions_1.json ===
medvqa/submission_samples/medico-2025/submission_task1.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM
2
+ from datasets import load_dataset
3
+ from transformers import AutoProcessor
4
+ import torch
5
+ import json
6
+ import time
7
+ from tqdm import tqdm
8
+ import subprocess
9
+ import platform
10
+ import sys
11
+
12
+ from evaluate import load
13
+
14
+ bleu = load("bleu")
15
+ rouge = load("rouge")
16
+ meteor = load("meteor")
17
+
18
+
19
+ val_dataset = load_dataset("SimulaMet/Kvasir-VQA-test", split="validation")
20
+ predictions = [] # List to store predictions
21
+
22
+ gpu_name = torch.cuda.get_device_name(
23
+ 0) if torch.cuda.is_available() else "cpu"
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+
26
+
27
+ def get_mem(): return torch.cuda.memory_allocated(device) / \
28
+ (1024 ** 2) if torch.cuda.is_available() else 0
29
+
30
+
31
+ initial_mem = get_mem()
32
+
33
+ # ✏️✏️--------EDIT SECTION 1: SUBMISISON DETAILS and MODEL LOADING --------✏️✏️#
34
+
35
+ SUBMISSION_INFO = {
36
+ # πŸ”Ή TODO: PARTICIPANTS MUST ADD PROPER SUBMISSION INFO FOR THE SUBMISSION πŸ”Ή
37
+ # This will be visible to the organizers
38
+ # DONT change the keys, only add your info
39
+ "Participant_Names": "Sushant Gautam, Steven Hicks and Vajita Thambawita",
40
+ "Affiliations": "SimulaMet",
41
+ "Contact_emails": ["[email protected]", "[email protected]"],
42
+ # But, the first email only will be used for correspondance
43
+ "Team_Name": "SimulaMetmedVQA Rangers",
44
+ "Country": "Norway",
45
+ "Notes_to_organizers": '''
46
+ eg, We have finetund XXX model
47
+ This is optional . .
48
+ Used data augmentations . .
49
+ Custom info about the model . .
50
+ Any insights. .
51
+ + Any informal things you like to share about this submission.
52
+ '''
53
+ }
54
+ # πŸ”Ή TODO: PARTICIPANTS MUST LOAD THEIR MODEL HERE, EDIT AS NECESSARY FOR YOUR MODEL πŸ”Ή
55
+ # can add necessary library imports here
56
+
57
+ model_hf = AutoModelForCausalLM.from_pretrained(
58
+ "SushantGautam/Florence-2-vqa-demo", trust_remote_code=True).to(device)
59
+ processor = AutoProcessor.from_pretrained(
60
+ "microsoft/Florence-2-base-ft", trust_remote_code=True)
61
+
62
+ model_hf.eval() # Ensure model is in evaluation mode
63
+ # 🏁----------------END SUBMISISON DETAILS and MODEL LOADING -----------------🏁#
64
+
65
+ start_time, post_model_mem = time.time(), get_mem()
66
+ total_time, final_mem = round(
67
+ time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
68
+ model_mem_used = round(post_model_mem - initial_mem, 2)
69
+
70
+ for idx, ex in enumerate(tqdm(val_dataset, desc="Validating")):
71
+ question = ex["question"]
72
+ image = ex["image"].convert(
73
+ "RGB") if ex["image"].mode != "RGB" else ex["image"]
74
+ # you have access to 'question' and 'image' variables for each example
75
+
76
+ # ✏️✏️___________EDIT SECTION 2: ANSWER GENERATION___________✏️✏️#
77
+ # πŸ”Ή TODO: PARTICIPANTS CAN MODIFY THIS TOKENIZATION STEP IF NEEDED πŸ”Ή
78
+ inputs = processor(text=[question], images=[image],
79
+ return_tensors="pt", padding=True)
80
+ inputs = {k: v.to(device) for k, v in inputs.items()
81
+ if k not in ['labels', 'attention_mask']}
82
+
83
+ # πŸ”Ή TODO: PARTICIPANTS CAN MODIFY THE GENERATION AND DECODING METHOD HERE πŸ”Ή
84
+ with torch.no_grad():
85
+ output = model_hf.generate(**inputs)
86
+ answer = processor.tokenizer.decode(output[0], skip_special_tokens=True)
87
+ # make sure 'answer' variable will hold answer (sentence/word) as str
88
+ # 🏁________________ END ANSWER GENERATION ________________🏁#
89
+
90
+ # β›” DO NOT EDIT any lines below from here, can edit only upto decoding step above as required. β›”
91
+ # Ensures answer is a string
92
+ assert isinstance(
93
+ answer, str), f"Generated answer at index {idx} is not a string"
94
+ # Appends prediction
95
+ predictions.append(
96
+ {"index": idx, "img_id": ex["img_id"], "question": ex["question"], "answer": answer})
97
+
98
+ # Ensure all predictions match dataset length
99
+ assert len(predictions) == len(
100
+ val_dataset), "Mismatch between predictions and dataset length"
101
+
102
+ total_time, final_mem = round(
103
+ time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
104
+ model_mem_used = round(post_model_mem - initial_mem, 2)
105
+
106
+ # caulcualtes metrics
107
+ references = [[e] for e in val_dataset['answer']]
108
+ preds = [pred['answer'] for pred in predictions]
109
+
110
+ bleu_result = bleu.compute(predictions=preds, references=references)
111
+ rouge_result = rouge.compute(predictions=preds, references=references)
112
+ meteor_result = meteor.compute(predictions=preds, references=references)
113
+ bleu_score = round(bleu_result['bleu'], 2)
114
+ rouge1_score = round(float(rouge_result['rouge1']), 2)
115
+ rouge2_score = round(float(rouge_result['rouge2']), 2)
116
+ rougeL_score = round(float(rouge_result['rougeL']), 2)
117
+ meteor_score = round(float(meteor_result['meteor']), 2)
118
+
119
+ public_scores = {
120
+ 'bleu': bleu_score,
121
+ 'rouge1': rouge1_score,
122
+ 'rouge2': rouge2_score,
123
+ 'rougeL': rougeL_score,
124
+ 'meteor': meteor_score
125
+ }
126
+ print("✨Public scores: ", public_scores)
127
+
128
+ # Saves predictions to a JSON file
129
+
130
+ output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores,
131
+ "predictions": predictions, "total_time": total_time, "time_per_item": total_time / len(val_dataset),
132
+ "memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name,
133
+ "debug": {
134
+ "packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])),
135
+ "system": {
136
+ "python": platform.python_version(),
137
+ "os": platform.system(),
138
+ "platform": platform.platform(),
139
+ "arch": platform.machine()
140
+ }}}
141
+
142
+
143
+ with open("predictions_1.json", "w") as f:
144
+ json.dump(output_data, f, indent=4)
145
+ print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}")
146
+ print("βœ… Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_1.json'.")
147
+ print("Next Step:\n 1) Upload this submission_task1.py script file to HuggingFace model repository.")
148
+ print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=medico-2025 --task=1 --repo_id=...''')