| import os | |
| import json | |
| from typing import Dict, List | |
| def load_eurorad_dataset( | |
| dataset_path: str, | |
| section: str = "any", | |
| as_dict: bool = False, | |
| filter_by_caption: List[str] = [ | |
| "xray", | |
| "x-ray", | |
| "x ray", | |
| "ray", | |
| "xr", | |
| "radiograph", | |
| "radiogram", | |
| "plain film", | |
| ], | |
| ) -> List[Dict] | Dict[str, Dict]: | |
| """ | |
| Load a dataset from a JSON file. | |
| Args: | |
| dataset_path (str): Path to the JSON dataset file. | |
| section (str, optional): Section of the dataset to load. Defaults to "any". | |
| as_dict (bool, optional): Whether to return data as dict. Defaults to False. | |
| filter_by_caption (List[str], optional): List of strings to filter cases by caption content. Defaults to []. | |
| Returns: | |
| List[Dict] | Dict[str, Dict]: The loaded dataset as a list of dictionaries or dict if as_dict=True. | |
| Raises: | |
| FileNotFoundError: If dataset_path does not exist | |
| json.JSONDecodeError: If file is not valid JSON | |
| """ | |
| with open(dataset_path, "r", encoding="utf-8") as file: | |
| data = json.load(file) | |
| if filter_by_caption: | |
| filtered_data = {} | |
| for case_id, case in data.items(): | |
| if any( | |
| any(x in subfig["caption"].lower() for x in filter_by_caption) | |
| for figure in case["figures"] | |
| for subfig in figure["subfigures"] | |
| ) or any(x in case["image_finding"].lower() for x in filter_by_caption): | |
| filtered_data[case_id] = case | |
| data = filtered_data | |
| if section != "any": | |
| section = section.strip().lower() | |
| if not as_dict: | |
| data = [ | |
| item for item in data.values() if item.get("section", "").strip().lower() == section | |
| ] | |
| else: | |
| data = { | |
| k: v for k, v in data.items() if v.get("section", "").strip().lower() == section | |
| } | |
| elif not as_dict: | |
| data = list(data.values()) | |
| return data | |
| def save_dataset(dataset: Dict | List[Dict], dataset_path: str): | |
| """ | |
| Save a dataset to a JSON file. | |
| Args: | |
| dataset (Dict | List[Dict]): The dataset to save as a dictionary or list of dictionaries. | |
| dataset_path (str): Path where the JSON dataset file will be saved. | |
| """ | |
| with open(dataset_path, "w", encoding="utf-8") as file: | |
| json.dump(dataset, file) | |