Quentin Mace commited on
Commit
7e8e535
·
1 Parent(s): 00cedc6

handle_new_names

Browse files
Files changed (1) hide show
  1. data/dataset_handler.py +18 -7
data/dataset_handler.py CHANGED
@@ -34,8 +34,12 @@ DEPRECATED_VIDORE_2_DATASETS_KEYWORDS = [
34
  "rse_restaurant",
35
  "mit_biomedical",
36
  "economics_macro",
 
 
 
37
  ]
38
 
 
39
  def get_datasets_nickname(dataset_name) -> str:
40
  if dataset_name == "VidoreArxivQARetrieval":
41
  return "ArxivQA"
@@ -70,6 +74,7 @@ def get_datasets_nickname(dataset_name) -> str:
70
  else:
71
  raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
72
 
 
73
  def deprecated_get_datasets_nickname(dataset_name) -> str:
74
  if "arxivqa" in dataset_name:
75
  return "ArxivQA"
@@ -99,25 +104,31 @@ def deprecated_get_datasets_nickname(dataset_name) -> str:
99
  elif "healthcare_industry" in dataset_name:
100
  return "Healthcare Industry"
101
 
102
- elif "restaurant_esg" in dataset_name:
103
  return "ESG Restaurant Human"
104
 
105
- elif "rse_restaurant" in dataset_name and "multilingual" in dataset_name:
 
 
106
  return "ESG Restaurant Synthetic Multilingual"
107
 
108
- elif "rse_restaurant" in dataset_name:
109
  return "ESG Restaurant Synthetic"
110
 
111
- elif "mit_biomedical" in dataset_name and "multilingual" in dataset_name:
 
 
112
  return "MIT Biomedical Multilingual"
113
 
114
- elif "mit_biomedical" in dataset_name:
115
  return "MIT Biomedical"
116
 
117
- elif "economics_macro" in dataset_name and "multilingual" in dataset_name:
 
 
118
  return "Economics Macro Multilingual"
119
 
120
- elif "economics_macro" in dataset_name:
121
  return "Economics Macro"
122
 
123
  else:
 
34
  "rse_restaurant",
35
  "mit_biomedical",
36
  "economics_macro",
37
+ "biomedical_lectures",
38
+ "esg_reports",
39
+ "economics_reports",
40
  ]
41
 
42
+
43
  def get_datasets_nickname(dataset_name) -> str:
44
  if dataset_name == "VidoreArxivQARetrieval":
45
  return "ArxivQA"
 
74
  else:
75
  raise ValueError(f"Dataset {dataset_name} not found in ViDoRe")
76
 
77
+
78
  def deprecated_get_datasets_nickname(dataset_name) -> str:
79
  if "arxivqa" in dataset_name:
80
  return "ArxivQA"
 
104
  elif "healthcare_industry" in dataset_name:
105
  return "Healthcare Industry"
106
 
107
+ elif ("restaurant_esg" in dataset_name) or ("esg_reports_human" in dataset_name):
108
  return "ESG Restaurant Human"
109
 
110
+ elif ("rse_restaurant" in dataset_name and "multilingual" in dataset_name) or (
111
+ "esg_reports" in dataset_name and not "_eng_" in dataset_name
112
+ ):
113
  return "ESG Restaurant Synthetic Multilingual"
114
 
115
+ elif ("rse_restaurant" in dataset_name) or ("esg_reports" in dataset_name and "_eng_" in dataset_name):
116
  return "ESG Restaurant Synthetic"
117
 
118
+ elif ("mit_biomedical" in dataset_name and "multilingual" in dataset_name) or (
119
+ "biomedical_lectures" in dataset_name and not "_eng_" in dataset_name
120
+ ):
121
  return "MIT Biomedical Multilingual"
122
 
123
+ elif ("mit_biomedical" in dataset_name) or ("biomedical_lectures" in dataset_name and "_eng_" in dataset_name):
124
  return "MIT Biomedical"
125
 
126
+ elif ("economics_macro" in dataset_name and "multilingual" in dataset_name) or (
127
+ "economics_reports" in dataset_name and not "_eng_" in dataset_name
128
+ ):
129
  return "Economics Macro Multilingual"
130
 
131
+ elif ("economics_macro" in dataset_name) or ("economics_reports" in dataset_name and "_eng_" in dataset_name):
132
  return "Economics Macro"
133
 
134
  else: