Spaces:
Running
Running
| VIDORE_V1_MTEB_NAMES = [ | |
| "VidoreArxivQARetrieval", | |
| "VidoreDocVQARetrieval", | |
| "VidoreInfoVQARetrieval", | |
| "VidoreShiftProjectRetrieval", | |
| "VidoreSyntheticDocQAAIRetrieval", | |
| "VidoreSyntheticDocQAEnergyRetrieval", | |
| "VidoreSyntheticDocQAGovernmentReportsRetrieval", | |
| "VidoreSyntheticDocQAHealthcareIndustryRetrieval", | |
| "VidoreTabfquadRetrieval", | |
| "VidoreTatdqaRetrieval", | |
| ] | |
| VIDORE_V2_MTEB_NAMES = [ | |
| "Vidore2BioMedicalLecturesRetrieval", | |
| "Vidore2EconomicsReportsRetrieval", | |
| "Vidore2ESGReportsHLRetrieval", | |
| "Vidore2ESGReportsRetrieval", | |
| ] | |
| DEPRECATED_VIDORE_DATASETS_KEYWORDS = [ | |
| "arxivqa", | |
| "docvqa", | |
| "infovqa", | |
| "tabfquad", | |
| "tatdqa", | |
| "shift", | |
| "artificial_intelligence", | |
| "energy", | |
| "government_reports", | |
| "healthcare_industry", | |
| ] | |
| DEPRECATED_VIDORE_2_DATASETS_KEYWORDS = [ | |
| "restaurant_esg", | |
| "rse_restaurant", | |
| "mit_biomedical", | |
| "economics_macro", | |
| "biomedical_lectures", | |
| "esg_reports", | |
| "economics_reports", | |
| ] | |
| def get_datasets_nickname(dataset_name) -> str: | |
| if dataset_name == "VidoreArxivQARetrieval": | |
| return "ArxivQA" | |
| elif dataset_name == "VidoreDocVQARetrieval": | |
| return "DocVQA" | |
| elif dataset_name == "VidoreInfoVQARetrieval": | |
| return "InfoVQA" | |
| elif dataset_name == "VidoreTabfquadRetrieval": | |
| return "TabFQuad" | |
| elif dataset_name == "VidoreTatdqaRetrieval": | |
| return "TAT-DQA" | |
| elif dataset_name == "VidoreShiftProjectRetrieval": | |
| return "Shift Project" | |
| elif dataset_name == "VidoreSyntheticDocQAAIRetrieval": | |
| return "Artificial Intelligence" | |
| elif dataset_name == "VidoreSyntheticDocQAEnergyRetrieval": | |
| return "Energy" | |
| elif dataset_name == "VidoreSyntheticDocQAGovernmentReportsRetrieval": | |
| return "Government Reports" | |
| elif dataset_name == "VidoreSyntheticDocQAHealthcareIndustryRetrieval": | |
| return "Healthcare Industry" | |
| elif dataset_name == "Vidore2ESGReportsHLRetrieval": | |
| return "ESG Restaurant Human English" | |
| elif dataset_name == "Vidore2ESGReportsRetrieval": | |
| return "ESG Restaurant Synthetic Multilingual" | |
| elif dataset_name == "Vidore2BioMedicalLecturesRetrieval": | |
| return "MIT Biomedical Multilingual" | |
| elif dataset_name == "Vidore2EconomicsReportsRetrieval": | |
| return "Economics Macro Multilingual" | |
| else: | |
| raise ValueError(f"Dataset {dataset_name} not found in ViDoRe") | |
| def deprecated_get_datasets_nickname(dataset_name) -> str: | |
| if "arxivqa" in dataset_name: | |
| return "ArxivQA" | |
| elif "docvqa" in dataset_name: | |
| return "DocVQA" | |
| elif "infovqa" in dataset_name: | |
| return "InfoVQA" | |
| elif "tabfquad" in dataset_name: | |
| return "TabFQuad" | |
| elif "tatdqa" in dataset_name: | |
| return "TAT-DQA" | |
| elif "shift" in dataset_name: | |
| return "Shift Project" | |
| elif "artificial_intelligence" in dataset_name: | |
| return "Artificial Intelligence" | |
| elif "energy" in dataset_name: | |
| return "Energy" | |
| elif "government_reports" in dataset_name: | |
| return "Government Reports" | |
| elif "healthcare_industry" in dataset_name: | |
| return "Healthcare Industry" | |
| elif ("restaurant_esg" in dataset_name) or ("esg_reports_human" in dataset_name): | |
| return "ESG Restaurant Human" | |
| elif ("rse_restaurant" in dataset_name and "multilingual" in dataset_name) or ( | |
| "esg_reports" in dataset_name and "_eng_" not in dataset_name | |
| ): | |
| return "ESG Restaurant Synthetic Multilingual" | |
| elif ("rse_restaurant" in dataset_name) or ("esg_reports" in dataset_name and "_eng_" in dataset_name): | |
| return "ESG Restaurant Synthetic" | |
| elif ("mit_biomedical" in dataset_name and "multilingual" in dataset_name) or ( | |
| "biomedical_lectures" in dataset_name and "_eng_" not in dataset_name | |
| ): | |
| return "MIT Biomedical Multilingual" | |
| elif ("mit_biomedical" in dataset_name) or ("biomedical_lectures" in dataset_name and "_eng_" in dataset_name): | |
| return "MIT Biomedical" | |
| elif ("economics_macro" in dataset_name and "multilingual" in dataset_name) or ( | |
| "economics_reports" in dataset_name and "_eng_" not in dataset_name | |
| ): | |
| return "Economics Macro Multilingual" | |
| elif ("economics_macro" in dataset_name) or ("economics_reports" in dataset_name and "_eng_" in dataset_name): | |
| return "Economics Macro" | |
| else: | |
| raise ValueError(f"Dataset {dataset_name} not found in ViDoRe") | |