File size: 12,181 Bytes
bd5481a
 
 
 
 
 
 
 
 
 
5372186
bd5481a
 
 
 
 
 
 
5372186
 
 
85735f4
 
bd5481a
 
 
 
 
 
 
a151084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03e299f
a151084
 
bd5481a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi


DATASETS = [
    "mMARCO-fr", 
    "BSARD",
]
DENSE_SINGLE_BIENCODERS = [
    "antoinelouis/biencoder-camemberta-base-mmarcoFR",
    "antoinelouis/biencoder-camembert-base-mmarcoFR",
    "antoinelouis/biencoder-distilcamembert-mmarcoFR",
    "antoinelouis/biencoder-camembert-L10-mmarcoFR",
    "antoinelouis/biencoder-camembert-L8-mmarcoFR",
    "antoinelouis/biencoder-camembert-L6-mmarcoFR",
    "antoinelouis/biencoder-camembert-L4-mmarcoFR",
    "antoinelouis/biencoder-camembert-L2-mmarcoFR",
    "antoinelouis/biencoder-electra-base-mmarcoFR",
    "antoinelouis/biencoder-mMiniLMv2-L12-mmarcoFR",
    "antoinelouis/biencoder-mMiniLMv2-L6-mmarcoFR",
    "OrdalieTech/Solon-embeddings-large-0.1",
    "OrdalieTech/Solon-embeddings-base-0.1",
]
DENSE_MULTI_BIENCODERS = [
    "antoinelouis/colbertv1-camembert-base-mmarcoFR",
    "antoinelouis/colbertv2-camembert-L4-mmarcoFR",
    "antoinelouis/colbert-xm",
]
SPARSE_SINGLE_BIENCODERS = []
CROSS_ENCODERS = [
    "antoinelouis/crossencoder-camemberta-L2-mmarcoFR",
    "antoinelouis/crossencoder-camemberta-L4-mmarcoFR",
    "antoinelouis/crossencoder-camemberta-L6-mmarcoFR",
    "antoinelouis/crossencoder-camemberta-L8-mmarcoFR",
    "antoinelouis/crossencoder-camemberta-L10-mmarcoFR",
    "antoinelouis/crossencoder-camemberta-base-mmarcoFR",
    "antoinelouis/crossencoder-camembert-L2-mmarcoFR",
    "antoinelouis/crossencoder-camembert-L4-mmarcoFR",
    "antoinelouis/crossencoder-camembert-L6-mmarcoFR",
    "antoinelouis/crossencoder-camembert-L8-mmarcoFR",
    "antoinelouis/crossencoder-camembert-L10-mmarcoFR",
    "antoinelouis/crossencoder-camembert-base-mmarcoFR",
    "antoinelouis/crossencoder-camembert-large-mmarcoFR",
    "antoinelouis/crossencoder-distilcamembert-mmarcoFR",
    "antoinelouis/crossencoder-electra-base-mmarcoFR",
    "antoinelouis/crossencoder-me5-base-mmarcoFR",
    "antoinelouis/crossencoder-me5-small-mmarcoFR",
    "antoinelouis/crossencoder-t5-base-mmarcoFR",
    "antoinelouis/crossencoder-t5-small-mmarcoFR",
    "antoinelouis/crossencoder-mt5-base-mmarcoFR",
    "antoinelouis/crossencoder-mt5-small-mmarcoFR",
    "antoinelouis/crossencoder-xlm-roberta-base-mmarcoFR",
    "antoinelouis/crossencoder-mdebertav3-base-mmarcoFR",
    "antoinelouis/crossencoder-mMiniLM-L6-v2-mmarcoFR",
]
LLMS = []
COLUMNS = {
    "Model": "html",
    "#Params (M)": "number",
    "Type": "str",
    "Dataset": "str",
    "Recall@1000": "number",
    "Recall@500": "number",
    "Recall@100": "number",
    "Recall@10": "number",
    "MRR@10": "number",
    "nDCG@10": "number",
    "MAP@10": "number",
}


def get_model_info(model_id: str, model_type: str) -> pd.DataFrame:
    data = {}
    api = HfApi()
    model_info = api.model_info(model_id)
    for result in model_info.card_data.eval_results:
        if result.dataset_name in DATASETS and result.dataset_name not in data:
            data[result.dataset_name] = {key: None for key in COLUMNS.keys()}
            data[result.dataset_name]["Model"] = f'<a href="https://huggingface.co/{model_id}" target="_blank" style="color: blue; text-decoration: none;">{model_id}</a>'
            data[result.dataset_name]["#Params (M)"] = round(model_info.safetensors.total/1e6) if model_info.safetensors else None
            data[result.dataset_name]["Type"] = model_type
            data[result.dataset_name]["Dataset"] = result.dataset_name
        
        if result.dataset_name in DATASETS and result.metric_name in data[result.dataset_name]:
            data[result.dataset_name][result.metric_name] = result.metric_value
    
    return pd.DataFrame(list(data.values()))

def load_all_results() -> pd.DataFrame:
    df = pd.DataFrame()
    for model_id in DENSE_SINGLE_BIENCODERS:
        df = pd.concat([df, get_model_info(model_id, model_type="DSVBE")])
    for model_id in DENSE_MULTI_BIENCODERS:
        df = pd.concat([df, get_model_info(model_id, model_type="DMVBE")])
    for model_id in SPARSE_SINGLE_BIENCODERS:
        df = pd.concat([df, get_model_info(model_id, model_type="SSVBE")])
    for model_id in CROSS_ENCODERS:
        df = pd.concat([df, get_model_info(model_id, model_type="CE")])
    for model_id in LLMS:
        df = pd.concat([df, get_model_info(model_id, model_type="LLM")])
    return df

def filter_dataf_by_dataset(dataf: pd.DataFrame, dataset_name: str, sort_by: str) -> pd.DataFrame:
    return (dataf
        .loc[dataf["Dataset"] == dataset_name]
        .drop(columns=["Dataset"])
        .sort_values(by=sort_by, ascending=False)
    )


def update_table(dataf: pd.DataFrame, query: str, selected_types: list, selected_sizes: list) -> pd.DataFrame:
    filtered_df = dataf.copy()
    conditions = []

    for val in selected_types:
        if val == 'Dense single-vector bi-encoder (DSVBE)':
            conditions.append((filtered_df['Type'] == 'DSVBE'))
        elif val == 'Dense multi-vector bi-encoder (DMVBE)':
            conditions.append((filtered_df['Type'] == 'DMVBE'))
        elif val == 'Sparse single-vector bi-encoder (SSVBE)':
            conditions.append((filtered_df['Type'] == 'SSVBE'))
        elif val == 'Cross-encoder (CE)':
            conditions.append((filtered_df['Type'] == 'CE'))
        elif val == 'LLM':
            conditions.append((filtered_df['Type'] == 'LLM'))
    
    for val in selected_sizes:
        if val == 'Small (< 100M)':
            conditions.append((filtered_df['#Params (M)'] < 100))
        elif val == 'Base (100M-300M)':
            conditions.append((filtered_df['#Params (M)'] >= 100) & (filtered_df['#Params (M)'] <= 300))
        elif val == 'Large (300M-500M)':
            conditions.append((filtered_df['#Params (M)'] >= 300) & (filtered_df['#Params (M)'] <= 500))
        elif val == 'Extra-large (500M+)':
            conditions.append((filtered_df['#Params (M)'] > 500))
    
    if conditions:
        filtered_df = filtered_df[pd.concat(conditions, axis=1).any(axis=1)]
    
    if query:
        filtered_df = filtered_df[filtered_df['Model'].str.contains(query, case=False)]
    
    return filtered_df


with gr.Blocks() as demo:
    gr.HTML("""
        <div style="display: flex; flex-direction: column; align-items: center;">
            <div style="align-self: flex-start;">
                <a href="mailto:[email protected]" target="_blank" style="color: blue; text-decoration: none;">Contact/Submissions</a>
            </div>
            <h1 style="margin: 0;">🥇 DécouvrIR\n</h1>A Benchmark for Evaluating the Robustness of Information Retrieval Models in French</h1>
        </div>
    """)

    # Create the Pandas dataframes (one per dataset)
    all_df = load_all_results()
    mmarco_df = filter_dataf_by_dataset(all_df, dataset_name="mMARCO-fr", sort_by="Recall@500")
    bsard_df = filter_dataf_by_dataset(all_df, dataset_name="BSARD", sort_by="Recall@500")
    
    # Search and filter widgets
    with gr.Column():
        with gr.Row():
            search_bar = gr.Textbox(placeholder=" 🔍 Search for a model...", show_label=False, elem_id="search-bar")
        
        with gr.Row():
            filter_type = gr.CheckboxGroup(
                label="Model type",
                choices=[
                    'Dense single-vector bi-encoder (DSVBE)', 
                    'Dense multi-vector bi-encoder (DMVBE)', 
                    'Sparse single-vector bi-encoder (SSVBE)', 
                    'Cross-encoder (CE)', 
                    'LLM',
                ],
                value=[],
                interactive=True,
                elem_id="filter-type",
            )

        with gr.Row():
            filter_size = gr.CheckboxGroup(
                label="Model size",
                choices=['Small (< 100M)', 'Base (100M-300M)', 'Large (300M-500M)', 'Extra-large (500M+)'],
                value=[],
                interactive=True,
                elem_id="filter-size",
            )

    # Leaderboard tables
    with gr.Tabs():
        with gr.TabItem("🌐 mMARCO-fr"):
            gr.HTML("""
                <p>The <a href="https://huggingface.co/datasets/unicamp-dl/mmarco" target="_blank" style="color: blue; text-decoration: none;">mMARCO</a> dataset is a machine-translated version of 
                the widely popular MS MARCO dataset across 13 languages (including French) for studying <strong> domain-general</strong> passage retrieval.</p>
                <p>The evaluation is performed on <strong>6,980 dev questions</strong> labeled with relevant passages to be retrieved from a corpus of <strong>8,841,823 candidates</strong>.</p>
            """)
            mmarco_table = gr.Dataframe(
                value=mmarco_df,
                datatype=[COLUMNS[col] for col in mmarco_df.columns],
                interactive=False,
                elem_classes="text-sm",
            )

        with gr.TabItem("⚖️ BSARD"):
            gr.HTML("""
                <p>The <a href="https://huggingface.co/datasets/maastrichtlawtech/bsard" target="_blank" style="color: blue; text-decoration: none;">Belgian Statutory Article Retrieval Dataset (BSARD)</a> is a 
                French native dataset for studying <strong>legal</strong> document retrieval.</p>
                <p>The evaluation is performed on <strong>222 test questions</strong> labeled by experienced jurists with relevant Belgian law articles to be retrieved from a corpus of <strong>22,633 candidates</strong>.</p>
                <i>[Coming soon...]</i>
            """)
            # bsard_table = gr.Dataframe(
            #     value=bsard_df,
            #     datatype=[COLUMNS[col] for col in bsard_df.columns],
            #     interactive=False,
            #     elem_classes="text-sm",
            # )

    # Update tables on search.
    search_bar.change(
        fn=lambda x: update_table(dataf=mmarco_df, query=x, selected_types=filter_type.value, selected_sizes=filter_size.value),
        inputs=[search_bar],
        outputs=mmarco_table,
    )
    # search_bar.change(
    #     fn=lambda x: update_table(dataf=bsard_df, query=x, selected_types=filter_type.value, selected_sizes=filter_size.value),
    #     inputs=[search_bar],
    #     outputs=bsard_table,
    # )

    # Update tables on model type filter.
    filter_type.change(
        fn=lambda selected_types: update_table(mmarco_df, search_bar.value, selected_types, filter_size.value),
        inputs=[filter_type],
        outputs=mmarco_table,
    )
    # filter_type.change(
    #     fn=lambda selected_types: update_table(bsard_df, search_bar.value, selected_types, filter_size.value),
    #     inputs=[filter_type],
    #     outputs=bsard_table,
    # )

    # Update tables on model size filter.
    filter_size.change(
        fn=lambda selected_sizes: update_table(mmarco_df, search_bar.value, filter_type.value, selected_sizes),
        inputs=[filter_size],
        outputs=mmarco_table,
    )
    # filter_size.change(
    #     fn=lambda selected_sizes: update_table(bsard_df, search_bar.value, filter_type.value, selected_sizes),
    #     inputs=[filter_size],
    #     outputs=bsard_table,
    # )
    
    # Citation
    with gr.Column():
        with gr.Row():
            gr.HTML("""
                <h2>Citation</h2>
                <p>For attribution in academic contexts, please cite this benchmark and any of the models released by <a href="https://huggingface.co/antoinelouis" target="_blank" style="color: blue; text-decoration: none;">@antoinelouis</a> as follows:</p>
            """)
        with gr.Row():
            citation_block = (
                "@online{louis2024decouvrir,\n"
                "\tauthor    = 'Antoine Louis',\n"
                "\ttitle     = 'DécouvrIR: A Benchmark for Evaluating the Robustness of Information Retrieval Models in French',\n"
                "\tpublisher = 'Hugging Face',\n"
                "\tmonth     = 'mar',\n"
                "\tyear      = '2024',\n"
                "\turl       = 'https://huggingface.co/spaces/antoinelouis/decouvrir',\n"
                "}\n"
            )
            gr.Code(citation_block, language=None, show_label=False)

demo.launch()