import gradio as gr from app.utils import add_rank_and_format, filter_models, get_refresh_function, deprecated_get_refresh_function from data.deprecated_model_handler import DeprecatedModelHandler from data.model_handler import ModelHandler METRICS = [ "ndcg_at_1", "ndcg_at_5", "ndcg_at_10", "ndcg_at_100", "recall_at_1", "recall_at_5", "recall_at_10", "recall_at_100", ] def main(): # Get new results model_handler = ModelHandler() initial_metric = "ndcg_at_5" model_handler.get_vidore_data(initial_metric) data_benchmark_1 = model_handler.render_df(initial_metric, benchmark_version=1) data_benchmark_1 = add_rank_and_format(data_benchmark_1, benchmark_version=1) data_benchmark_2 = model_handler.render_df(initial_metric, benchmark_version=2) data_benchmark_2 = add_rank_and_format(data_benchmark_2, benchmark_version=2) num_datasets_1 = len(data_benchmark_1.columns) - 3 num_scores_1 = len(data_benchmark_1) * num_datasets_1 num_models_1 = len(data_benchmark_1) num_datasets_2 = len(data_benchmark_2.columns) - 3 num_scores_2 = len(data_benchmark_2) * num_datasets_2 num_models_2 = len(data_benchmark_2) # Get deprecated results deprecated_model_handler = DeprecatedModelHandler() initial_metric = "ndcg_at_5" deprecated_model_handler.get_vidore_data(initial_metric) deprecated_data_benchmark_1 = deprecated_model_handler.render_df(initial_metric, benchmark_version=1) deprecated_data_benchmark_1 = add_rank_and_format(deprecated_data_benchmark_1, benchmark_version=1) deprecated_data_benchmark_2 = deprecated_model_handler.render_df(initial_metric, benchmark_version=2) deprecated_data_benchmark_2 = add_rank_and_format(deprecated_data_benchmark_2, benchmark_version=2) deprecated_num_datasets_1 = len(deprecated_data_benchmark_1.columns) - 3 deprecated_num_scores_1 = len(deprecated_data_benchmark_1) * deprecated_num_datasets_1 deprecated_num_models_1 = len(deprecated_data_benchmark_1) deprecated_num_datasets_2 = len(deprecated_data_benchmark_2.columns) - 3 deprecated_num_scores_2 = len(deprecated_data_benchmark_2) * deprecated_num_datasets_2 deprecated_num_models_2 = len(deprecated_data_benchmark_2) css = """ table > thead { white-space: normal } table { --cell-width-1: 250px } table > tbody > tr > td:nth-child(2) > div { overflow-x: auto } .filter-checkbox-group { max-width: max-content; } #markdown size .markdown { font-size: 1rem; } """ with gr.Blocks(css=css) as block: with gr.Tabs(): with gr.TabItem("ViDoRe V1"): gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark 1 📚🔍") gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀") gr.Markdown( """ Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab. Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models. """ ) datasets_columns_1 = list(data_benchmark_1.columns[4:]) with gr.Row(): metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") research_textbox_1 = gr.Textbox( placeholder="🔍 Search Models... [press enter]", label="Filter Models by Name", ) column_checkboxes_1 = gr.CheckboxGroup( choices=datasets_columns_1, value=datasets_columns_1, label="Select Columns to Display" ) with gr.Row(): datatype_1 = ["number", "markdown"] + ["number"] * (num_datasets_1 + 1) dataframe_1 = gr.Dataframe(data_benchmark_1, datatype=datatype_1, type="pandas") def update_data_1(metric, search_term, selected_columns): model_handler.get_vidore_data(metric) data = model_handler.render_df(metric, benchmark_version=1) data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns) data = filter_models(data, search_term) if selected_columns: data = data[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + selected_columns] return data with gr.Row(): refresh_button_1 = gr.Button("Refresh") refresh_button_1.click( get_refresh_function(model_handler, benchmark_version=1), inputs=[metric_dropdown_1], outputs=dataframe_1, concurrency_limit=20, ) # Automatically refresh the dataframe when the dropdown value changes metric_dropdown_1.change( get_refresh_function(model_handler, benchmark_version=1), inputs=[metric_dropdown_1], outputs=dataframe_1, ) research_textbox_1.submit( lambda metric, search_term, selected_columns: update_data_1(metric, search_term, selected_columns), inputs=[metric_dropdown_1, research_textbox_1, column_checkboxes_1], outputs=dataframe_1, ) column_checkboxes_1.change( lambda metric, search_term, selected_columns: update_data_1(metric, search_term, selected_columns), inputs=[metric_dropdown_1, research_textbox_1, column_checkboxes_1], outputs=dataframe_1, ) gr.Markdown( f""" - **Total Datasets**: {num_datasets_1} - **Total Scores**: {num_scores_1} - **Total Models**: {num_models_1} """ + r""" Please consider citing: ```bibtex @misc{faysse2024colpaliefficientdocumentretrieval, title={ColPali: Efficient Document Retrieval with Vision Language Models}, author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo}, year={2024}, eprint={2407.01449}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2407.01449}, } ``` """ ) with gr.TabItem("ViDoRe V2"): gr.Markdown("# ViDoRe V2: A new visual Document Retrieval Benchmark 📚🔍") gr.Markdown("### A harder dataset benchmark for visual document retrieval 👀") gr.Markdown( """ Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab. Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models. """ ) datasets_columns_2 = list(data_benchmark_2.columns[4:]) with gr.Row(): metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") research_textbox_2 = gr.Textbox( placeholder="🔍 Search Models... [press enter]", label="Filter Models by Name", ) column_checkboxes_2 = gr.CheckboxGroup( choices=datasets_columns_2, value=datasets_columns_2, label="Select Columns to Display" ) with gr.Row(): datatype_2 = ["number", "markdown"] + ["number"] * (num_datasets_2 + 1) dataframe_2 = gr.Dataframe(data_benchmark_2, datatype=datatype_2, type="pandas") def update_data_2(metric, search_term, selected_columns): model_handler.get_vidore_data(metric) data = model_handler.render_df(metric, benchmark_version=2) data = add_rank_and_format(data, benchmark_version=2, selected_columns=selected_columns) data = filter_models(data, search_term) # data = remove_duplicates(data) # Add this line if selected_columns: data = data[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + selected_columns] return data with gr.Row(): refresh_button_2 = gr.Button("Refresh") refresh_button_2.click( get_refresh_function(model_handler, benchmark_version=2), inputs=[metric_dropdown_2], outputs=dataframe_2, concurrency_limit=20, ) with gr.Row(): gr.Markdown( """ **Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side. Those numbers are not numbers obtained from the organisations that released those models. """ ) # Automatically refresh the dataframe when the dropdown value changes metric_dropdown_2.change( get_refresh_function(model_handler, benchmark_version=2), inputs=[metric_dropdown_2], outputs=dataframe_2, ) research_textbox_2.submit( lambda metric, search_term, selected_columns: update_data_2(metric, search_term, selected_columns), inputs=[metric_dropdown_2, research_textbox_2, column_checkboxes_2], outputs=dataframe_2, ) column_checkboxes_2.change( lambda metric, search_term, selected_columns: update_data_2(metric, search_term, selected_columns), inputs=[metric_dropdown_2, research_textbox_2, column_checkboxes_2], outputs=dataframe_2, ) gr.Markdown( f""" - **Total Datasets**: {num_datasets_2} - **Total Scores**: {num_scores_2} - **Total Models**: {num_models_2} """ + r""" Please consider citing: ```bibtex @misc{faysse2024colpaliefficientdocumentretrieval, title={ColPali: Efficient Document Retrieval with Vision Language Models}, author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo}, year={2024}, eprint={2407.01449}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2407.01449}, } @misc{macé2025vidorebenchmarkv2raising, title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval}, author={Quentin Macé and António Loison and Manuel Faysse}, year={2025}, eprint={2505.17166}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2505.17166}, } ``` """ ) with gr.TabItem("📚 Submit your model"): gr.Markdown("# How to Submit a New Model to the Leaderboard") gr.Markdown( """ To submit a new model to the ViDoRe leaderboard, follow these steps: 1. **Evaluate your model**: - Follow the evaluation procedure provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/) that uses MTEB. 2. **Format your submission file**: - Add the generated files to [MTEB results](https://github.com/embeddings-benchmark/results) project. Check the [Colpali results](https://github.com/embeddings-benchmark/results/tree/main/results/vidore__colpali-v1.3/1b5c8929330df1a66de441a9b5409a878f0de5b0) for an example. And you're done! Your model will appear on the leaderboard when you click refresh! Once the space gets rebooted, it will appear on startup. Note: For proper hyperlink redirection, please ensure that your model repository name is in kebab-case, e.g. `my-model-name`. """ ) with gr.TabItem("Deprecated ViDoRe V1"): gr.Markdown( "## Deprecation notice: This leaderboard contains the results computed with the " "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, " "which is no longer maintained. Results should be computed using the " "[mteb](https://github.com/embeddings-benchmark/mteb) package as described " "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md)." ) gr.Markdown("## Missing results in the new leaderboard are being added as they are re-computed.") gr.Markdown("# [Deprecated] ViDoRe: The Visual Document Retrieval Benchmark 1 📚🔍") gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀") gr.Markdown( """ Visual Document Retrieval Benchmark 1 leaderboard. To submit results, refer to the corresponding tab. Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models. """ ) deprecated_datasets_columns_1 = list(deprecated_data_benchmark_1.columns[3:]) with gr.Row(): deprecated_metric_dropdown_1 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") deprecated_research_textbox_1 = gr.Textbox( placeholder="🔍 Search Models... [press enter]", label="Filter Models by Name", ) deprecated_column_checkboxes_1 = gr.CheckboxGroup( choices=deprecated_datasets_columns_1, value=deprecated_datasets_columns_1, label="Select Columns to Display" ) with gr.Row(): deprecated_datatype_1 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_1 + 1) deprecated_dataframe_1 = gr.Dataframe(deprecated_data_benchmark_1, datatype=deprecated_datatype_1, type="pandas") def deprecated_update_data_1(metric, search_term, selected_columns): deprecated_model_handler.get_vidore_data(metric) data = deprecated_model_handler.render_df(metric, benchmark_version=1) data = add_rank_and_format(data, benchmark_version=1, selected_columns=selected_columns) data = filter_models(data, search_term) # data = remove_duplicates(data) # Add this line if selected_columns: data = data[["Rank", "Model", "Average"] + selected_columns] return data with gr.Row(): deprecated_refresh_button_1 = gr.Button("Refresh") deprecated_refresh_button_1.click( deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1), inputs=[deprecated_metric_dropdown_1], outputs=deprecated_dataframe_1, concurrency_limit=20, ) # Automatically refresh the dataframe when the dropdown value changes deprecated_metric_dropdown_1.change( deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=1), inputs=[deprecated_metric_dropdown_1], outputs=deprecated_dataframe_1, ) deprecated_research_textbox_1.submit( lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns), inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1], outputs=deprecated_dataframe_1, ) deprecated_column_checkboxes_1.change( lambda metric, search_term, selected_columns: deprecated_update_data_1(metric, search_term, selected_columns), inputs=[deprecated_metric_dropdown_1, deprecated_research_textbox_1, deprecated_column_checkboxes_1], outputs=deprecated_dataframe_1, ) gr.Markdown( f""" - **Total Datasets**: {deprecated_num_datasets_1} - **Total Scores**: {deprecated_num_scores_1} - **Total Models**: {deprecated_num_models_1} """ + r""" Please consider citing: ```bibtex @misc{faysse2024colpaliefficientdocumentretrieval, title={ColPali: Efficient Document Retrieval with Vision Language Models}, author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo}, year={2024}, eprint={2407.01449}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2407.01449}, } ``` """ ) with gr.TabItem("Deprecated ViDoRe V2"): gr.Markdown( "## Deprecation notice: This leaderboard contains the results computed with the " "[vidore-benchmark](https://github.com/illuin-tech/vidore-benchmark) package, " "which is no longer maintained. Results should be computed using the " "[mteb](https://github.com/embeddings-benchmark/mteb) package as described " "[here](https://github.com/illuin-tech/vidore-benchmark/blob/main/README.md)." ) gr.Markdown("## Missing results in the new leaderboard are being added as they are re-computed.") gr.Markdown("# [Deprecated] ViDoRe V2: A new visual Document Retrieval Benchmark 📚🔍") gr.Markdown("### A harder dataset benchmark for visual document retrieval 👀") gr.Markdown( """ Visual Document Retrieval Benchmark 2 leaderboard. To submit results, refer to the corresponding tab. Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics and models. """ ) deprecated_datasets_columns_2 = list(deprecated_data_benchmark_2.columns[3:]) with gr.Row(): deprecated_metric_dropdown_2 = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") deprecated_research_textbox_2 = gr.Textbox( placeholder="🔍 Search Models... [press enter]", label="Filter Models by Name", ) deprecated_column_checkboxes_2 = gr.CheckboxGroup( choices=deprecated_datasets_columns_2, value=deprecated_datasets_columns_2, label="Select Columns to Display" ) with gr.Row(): deprecated_datatype_2 = ["number", "markdown"] + ["number"] * (deprecated_num_datasets_2 + 1) deprecated_dataframe_2 = gr.Dataframe(deprecated_data_benchmark_2, datatype=deprecated_datatype_2, type="pandas") def deprecated_update_data_2(metric, search_term, selected_columns): deprecated_model_handler.get_vidore_data(metric) data = deprecated_model_handler.render_df(metric, benchmark_version=2) data = add_rank_and_format(data, benchmark_version=2, selected_columns=selected_columns) data = filter_models(data, search_term) # data = remove_duplicates(data) # Add this line if selected_columns: data = data[["Rank", "Model", "Average"] + selected_columns] return data with gr.Row(): deprecated_refresh_button_2 = gr.Button("Refresh") deprecated_refresh_button_2.click( deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2), inputs=[deprecated_metric_dropdown_2], outputs=deprecated_dataframe_2, concurrency_limit=20, ) with gr.Row(): gr.Markdown( """ **Note**: For now, all models were evaluated using the vidore-benchmark package and custom retrievers on our side. Those numbers are not numbers obtained from the organisations that released those models. """ ) # Automatically refresh the dataframe when the dropdown value changes deprecated_metric_dropdown_2.change( deprecated_get_refresh_function(deprecated_model_handler, benchmark_version=2), inputs=[deprecated_metric_dropdown_2], outputs=deprecated_dataframe_2, ) deprecated_research_textbox_2.submit( lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns), inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2], outputs=deprecated_dataframe_2, ) deprecated_column_checkboxes_2.change( lambda metric, search_term, selected_columns: deprecated_update_data_2(metric, search_term, selected_columns), inputs=[deprecated_metric_dropdown_2, deprecated_research_textbox_2, deprecated_column_checkboxes_2], outputs=deprecated_dataframe_2, ) gr.Markdown( f""" - **Total Datasets**: {deprecated_num_datasets_2} - **Total Scores**: {deprecated_num_scores_2} - **Total Models**: {deprecated_num_models_2} """ + r""" Please consider citing: ```bibtex @misc{faysse2024colpaliefficientdocumentretrieval, title={ColPali: Efficient Document Retrieval with Vision Language Models}, author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and Céline Hudelot and Pierre Colombo}, year={2024}, eprint={2407.01449}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2407.01449}, } @misc{macé2025vidorebenchmarkv2raising, title={ViDoRe Benchmark V2: Raising the Bar for Visual Retrieval}, author={Quentin Macé and António Loison and Manuel Faysse}, year={2025}, eprint={2505.17166}, archivePrefix={arXiv}, primaryClass={cs.IR}, url={https://arxiv.org/abs/2505.17166}, } ``` """ ) block.queue(max_size=10).launch(debug=True) if __name__ == "__main__": main()