add POPE to the name
Browse files- app.py +1 -1
- src/about.py +4 -4
app.py
CHANGED
@@ -42,7 +42,7 @@ with demo:
|
|
42 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
43 |
|
44 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
45 |
-
show_result_page(root_path='VH', title='ποΈ MHaluBench', index=0)
|
46 |
show_result_page(root_path='AVH-visual', title='πΊ AVHalluBench (Visual)', index=1)
|
47 |
show_result_page(root_path='AVH-audio', title='π AVHalluBench (Audio)', index=2)
|
48 |
show_about_page(index=3)
|
|
|
42 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
43 |
|
44 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
45 |
+
show_result_page(root_path='VH', title='ποΈ POPE/MHaluBench', index=0)
|
46 |
show_result_page(root_path='AVH-visual', title='πΊ AVHalluBench (Visual)', index=1)
|
47 |
show_result_page(root_path='AVH-audio', title='π AVHalluBench (Audio)', index=2)
|
48 |
show_about_page(index=3)
|
src/about.py
CHANGED
@@ -12,10 +12,10 @@ TITLE = """<h1 align="center" id="space-title">π Multimodal Hallucination Lea
|
|
12 |
# <a href="url"></a>
|
13 |
|
14 |
INTRODUCTION_TEXT = """
|
15 |
-
<p>The Multimodal Hallucination Leaderboard ranks multimodal large language models based on hallucination levels in various tasks. System rankings for three different input modalities are displayed, covering the audio, image, and video domains. For each task, hallucination levels are measured using various existing hallucination ranking metrics. The leaderboard currently consists of
|
16 |
<ul>
|
17 |
-
<li><b><a href="https://huggingface.co/datasets/openkg/MHaluBench">MHaluBench</a></b>:
|
18 |
-
<li><b><a href="https://huggingface.co/datasets/potsawee/avhallubench">AVHalluBench</a></b> (Visual and Audio):
|
19 |
</ul>
|
20 |
"""
|
21 |
|
@@ -24,7 +24,7 @@ TODO write about page here
|
|
24 |
"""
|
25 |
|
26 |
EVALUATION_QUEUE_TEXT = """
|
27 |
-
TODO
|
28 |
"""
|
29 |
|
30 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
|
|
12 |
# <a href="url"></a>
|
13 |
|
14 |
INTRODUCTION_TEXT = """
|
15 |
+
<p>The Multimodal Hallucination Leaderboard ranks multimodal large language models based on hallucination levels in various tasks. System rankings for three different input modalities are displayed, covering the audio, image, and video domains. For each task, hallucination levels are measured using various existing hallucination ranking metrics. The leaderboard currently consists of the following benchmarks:</p>
|
16 |
<ul>
|
17 |
+
<li><b><a href="https://arxiv.org/abs/2305.10355">POPE</a> / <a href="https://huggingface.co/datasets/openkg/MHaluBench">MHaluBench</a></b>: Both are image-captioning datasets where the task is to generate text given an input image. System Hallucination scores are measured using existing visual hallucination metrics as follows (i) the <a href="https://arxiv.org/abs/2305.10355">POPE</a> method is evaluated on the POPE dataset, and (ii) <a href="https://arxiv.org/abs/1809.02156">CHAIR</a>, <a href="https://arxiv.org/abs/2402.03190">UniHD</a>, <a href="https://arxiv.org/abs/2303.08896">SelfCheckGPT</a>, <a href="https://arxiv.org/abs/2405.13684">CrossCheckGPT</a>, and human evaluation scores (for systems investigated in the CrossCheckGPT paper) are evaluated on the image-captioning subset of MHaluBench dataset.</li>
|
18 |
+
<li><b><a href="https://huggingface.co/datasets/potsawee/avhallubench">AVHalluBench</a></b> (Visual and Audio): This is a video-captioning dataset where the task is to generate text descriptions given an input video. This dataset can be used for two different tasks; either generating <b>visual descriptions</b> or <b>audio descriptions</b>. Existing audio-visual hallucination metrics include <a href="https://arxiv.org/abs/2303.08896">SelfCheckGPT</a>, <a href="https://arxiv.org/abs/2405.13684">CrossCheckGPT</a>, and <a href="https://arxiv.org/abs/2405.13684">RefCheck</a>.</li>
|
19 |
</ul>
|
20 |
"""
|
21 |
|
|
|
24 |
"""
|
25 |
|
26 |
EVALUATION_QUEUE_TEXT = """
|
27 |
+
TODO write this
|
28 |
"""
|
29 |
|
30 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|