Spaces:
Running
Running
map_new_names (#10)
Browse files- handle_new_names (7e8e535c0cf34b319162b659dd1adb17474e54b5)
- data/dataset_handler.py +18 -7
data/dataset_handler.py
CHANGED
@@ -34,8 +34,12 @@ DEPRECATED_VIDORE_2_DATASETS_KEYWORDS = [
|
|
34 |
"rse_restaurant",
|
35 |
"mit_biomedical",
|
36 |
"economics_macro",
|
|
|
|
|
|
|
37 |
]
|
38 |
|
|
|
39 |
def get_datasets_nickname(dataset_name) -> str:
|
40 |
if dataset_name == "VidoreArxivQARetrieval":
|
41 |
return "ArxivQA"
|
@@ -70,6 +74,7 @@ def get_datasets_nickname(dataset_name) -> str:
|
|
70 |
else:
|
71 |
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
|
72 |
|
|
|
73 |
def deprecated_get_datasets_nickname(dataset_name) -> str:
|
74 |
if "arxivqa" in dataset_name:
|
75 |
return "ArxivQA"
|
@@ -99,25 +104,31 @@ def deprecated_get_datasets_nickname(dataset_name) -> str:
|
|
99 |
elif "healthcare_industry" in dataset_name:
|
100 |
return "Healthcare Industry"
|
101 |
|
102 |
-
elif "restaurant_esg" in dataset_name:
|
103 |
return "ESG Restaurant Human"
|
104 |
|
105 |
-
elif "rse_restaurant" in dataset_name and "multilingual" in dataset_name
|
|
|
|
|
106 |
return "ESG Restaurant Synthetic Multilingual"
|
107 |
|
108 |
-
elif "rse_restaurant" in dataset_name:
|
109 |
return "ESG Restaurant Synthetic"
|
110 |
|
111 |
-
elif "mit_biomedical" in dataset_name and "multilingual" in dataset_name
|
|
|
|
|
112 |
return "MIT Biomedical Multilingual"
|
113 |
|
114 |
-
elif "mit_biomedical" in dataset_name:
|
115 |
return "MIT Biomedical"
|
116 |
|
117 |
-
elif "economics_macro" in dataset_name and "multilingual" in dataset_name
|
|
|
|
|
118 |
return "Economics Macro Multilingual"
|
119 |
|
120 |
-
elif "economics_macro" in dataset_name:
|
121 |
return "Economics Macro"
|
122 |
|
123 |
else:
|
|
|
34 |
"rse_restaurant",
|
35 |
"mit_biomedical",
|
36 |
"economics_macro",
|
37 |
+
"biomedical_lectures",
|
38 |
+
"esg_reports",
|
39 |
+
"economics_reports",
|
40 |
]
|
41 |
|
42 |
+
|
43 |
def get_datasets_nickname(dataset_name) -> str:
|
44 |
if dataset_name == "VidoreArxivQARetrieval":
|
45 |
return "ArxivQA"
|
|
|
74 |
else:
|
75 |
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
|
76 |
|
77 |
+
|
78 |
def deprecated_get_datasets_nickname(dataset_name) -> str:
|
79 |
if "arxivqa" in dataset_name:
|
80 |
return "ArxivQA"
|
|
|
104 |
elif "healthcare_industry" in dataset_name:
|
105 |
return "Healthcare Industry"
|
106 |
|
107 |
+
elif ("restaurant_esg" in dataset_name) or ("esg_reports_human" in dataset_name):
|
108 |
return "ESG Restaurant Human"
|
109 |
|
110 |
+
elif ("rse_restaurant" in dataset_name and "multilingual" in dataset_name) or (
|
111 |
+
"esg_reports" in dataset_name and not "_eng_" in dataset_name
|
112 |
+
):
|
113 |
return "ESG Restaurant Synthetic Multilingual"
|
114 |
|
115 |
+
elif ("rse_restaurant" in dataset_name) or ("esg_reports" in dataset_name and "_eng_" in dataset_name):
|
116 |
return "ESG Restaurant Synthetic"
|
117 |
|
118 |
+
elif ("mit_biomedical" in dataset_name and "multilingual" in dataset_name) or (
|
119 |
+
"biomedical_lectures" in dataset_name and not "_eng_" in dataset_name
|
120 |
+
):
|
121 |
return "MIT Biomedical Multilingual"
|
122 |
|
123 |
+
elif ("mit_biomedical" in dataset_name) or ("biomedical_lectures" in dataset_name and "_eng_" in dataset_name):
|
124 |
return "MIT Biomedical"
|
125 |
|
126 |
+
elif ("economics_macro" in dataset_name and "multilingual" in dataset_name) or (
|
127 |
+
"economics_reports" in dataset_name and not "_eng_" in dataset_name
|
128 |
+
):
|
129 |
return "Economics Macro Multilingual"
|
130 |
|
131 |
+
elif ("economics_macro" in dataset_name) or ("economics_reports" in dataset_name and "_eng_" in dataset_name):
|
132 |
return "Economics Macro"
|
133 |
|
134 |
else:
|