Spaces:
Running
Running
VIDORE_V1_MTEB_NAMES = [ | |
"VidoreArxivQARetrieval", | |
"VidoreDocVQARetrieval", | |
"VidoreInfoVQARetrieval", | |
"VidoreShiftProjectRetrieval", | |
"VidoreSyntheticDocQAAIRetrieval", | |
"VidoreSyntheticDocQAEnergyRetrieval", | |
"VidoreSyntheticDocQAGovernmentReportsRetrieval", | |
"VidoreSyntheticDocQAHealthcareIndustryRetrieval", | |
"VidoreTabfquadRetrieval", | |
"VidoreTatdqaRetrieval", | |
] | |
VIDORE_V2_MTEB_NAMES = [ | |
"Vidore2BioMedicalLecturesRetrieval", | |
"Vidore2EconomicsReportsRetrieval", | |
"Vidore2ESGReportsHLRetrieval", | |
"Vidore2ESGReportsRetrieval", | |
] | |
DEPRECATED_VIDORE_DATASETS_KEYWORDS = [ | |
"arxivqa", | |
"docvqa", | |
"infovqa", | |
"tabfquad", | |
"tatdqa", | |
"shift", | |
"artificial_intelligence", | |
"energy", | |
"government_reports", | |
"healthcare_industry", | |
] | |
DEPRECATED_VIDORE_2_DATASETS_KEYWORDS = [ | |
"restaurant_esg", | |
"rse_restaurant", | |
"mit_biomedical", | |
"economics_macro", | |
"biomedical_lectures", | |
"esg_reports", | |
"economics_reports", | |
] | |
def get_datasets_nickname(dataset_name) -> str: | |
if dataset_name == "VidoreArxivQARetrieval": | |
return "ArxivQA" | |
elif dataset_name == "VidoreDocVQARetrieval": | |
return "DocVQA" | |
elif dataset_name == "VidoreInfoVQARetrieval": | |
return "InfoVQA" | |
elif dataset_name == "VidoreTabfquadRetrieval": | |
return "TabFQuad" | |
elif dataset_name == "VidoreTatdqaRetrieval": | |
return "TAT-DQA" | |
elif dataset_name == "VidoreShiftProjectRetrieval": | |
return "Shift Project" | |
elif dataset_name == "VidoreSyntheticDocQAAIRetrieval": | |
return "Artificial Intelligence" | |
elif dataset_name == "VidoreSyntheticDocQAEnergyRetrieval": | |
return "Energy" | |
elif dataset_name == "VidoreSyntheticDocQAGovernmentReportsRetrieval": | |
return "Government Reports" | |
elif dataset_name == "VidoreSyntheticDocQAHealthcareIndustryRetrieval": | |
return "Healthcare Industry" | |
elif dataset_name == "Vidore2ESGReportsHLRetrieval": | |
return "ESG Restaurant Human English" | |
elif dataset_name == "Vidore2ESGReportsRetrieval": | |
return "ESG Restaurant Synthetic Multilingual" | |
elif dataset_name == "Vidore2BioMedicalLecturesRetrieval": | |
return "MIT Biomedical Multilingual" | |
elif dataset_name == "Vidore2EconomicsReportsRetrieval": | |
return "Economics Macro Multilingual" | |
else: | |
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe") | |
def deprecated_get_datasets_nickname(dataset_name) -> str: | |
if "arxivqa" in dataset_name: | |
return "ArxivQA" | |
elif "docvqa" in dataset_name: | |
return "DocVQA" | |
elif "infovqa" in dataset_name: | |
return "InfoVQA" | |
elif "tabfquad" in dataset_name: | |
return "TabFQuad" | |
elif "tatdqa" in dataset_name: | |
return "TAT-DQA" | |
elif "shift" in dataset_name: | |
return "Shift Project" | |
elif "artificial_intelligence" in dataset_name: | |
return "Artificial Intelligence" | |
elif "energy" in dataset_name: | |
return "Energy" | |
elif "government_reports" in dataset_name: | |
return "Government Reports" | |
elif "healthcare_industry" in dataset_name: | |
return "Healthcare Industry" | |
elif ("restaurant_esg" in dataset_name) or ("esg_reports_human" in dataset_name): | |
return "ESG Restaurant Human" | |
elif ("rse_restaurant" in dataset_name and "multilingual" in dataset_name) or ( | |
"esg_reports" in dataset_name and not "_eng_" in dataset_name | |
): | |
return "ESG Restaurant Synthetic Multilingual" | |
elif ("rse_restaurant" in dataset_name) or ("esg_reports" in dataset_name and "_eng_" in dataset_name): | |
return "ESG Restaurant Synthetic" | |
elif ("mit_biomedical" in dataset_name and "multilingual" in dataset_name) or ( | |
"biomedical_lectures" in dataset_name and not "_eng_" in dataset_name | |
): | |
return "MIT Biomedical Multilingual" | |
elif ("mit_biomedical" in dataset_name) or ("biomedical_lectures" in dataset_name and "_eng_" in dataset_name): | |
return "MIT Biomedical" | |
elif ("economics_macro" in dataset_name and "multilingual" in dataset_name) or ( | |
"economics_reports" in dataset_name and not "_eng_" in dataset_name | |
): | |
return "Economics Macro Multilingual" | |
elif ("economics_macro" in dataset_name) or ("economics_reports" in dataset_name and "_eng_" in dataset_name): | |
return "Economics Macro" | |
else: | |
raise ValueError(f"Dataset {dataset_name} not found in ViDoRe") | |