Spaces:
Running
Running
Upload 8 files
Browse files- app.py +40 -0
- evaluation.py +45 -0
- leaderboard.py +72 -0
- leaderboard.tsv +5 -0
- requirements.txt +1 -0
- submission.py +16 -0
- utils.py +13 -0
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
import gradio as gr
|
6 |
+
from leaderboard import render_leader_board, render_info_html, render_citation
|
7 |
+
from evaluation import render_eval_info
|
8 |
+
from submission import render_submission_page
|
9 |
+
import os
|
10 |
+
from utils import load_leaderboard, custom_css
|
11 |
+
import gradio as gr
|
12 |
+
import os
|
13 |
+
|
14 |
+
# Load leaderboard data
|
15 |
+
leaderboard_df_err = load_leaderboard("./leaderboard.tsv")
|
16 |
+
|
17 |
+
def create_ui():
|
18 |
+
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md), css=custom_css) as demo:
|
19 |
+
# gr.Markdown("# Speech Deep Fake Arena")
|
20 |
+
gr.Image('./data/data/df_arena_3.jpg', interactive=False,
|
21 |
+
show_fullscreen_button=False, show_share_button=False, show_label=False)
|
22 |
+
|
23 |
+
with gr.Tabs():
|
24 |
+
with gr.Tab("🏆 Leaderboard"):
|
25 |
+
with gr.Column():
|
26 |
+
render_info_html()
|
27 |
+
gr.Markdown("Rankings for different systems. ")
|
28 |
+
render_leader_board(leaderboard_df_err) # Adjust this to work with Gradio components
|
29 |
+
render_citation()
|
30 |
+
|
31 |
+
with gr.Tab("📊 Metrics"):
|
32 |
+
render_eval_info()
|
33 |
+
|
34 |
+
with gr.Tab("📤 Submit your own system !"):
|
35 |
+
render_submission_page()
|
36 |
+
|
37 |
+
return demo
|
38 |
+
|
39 |
+
# Launch the app
|
40 |
+
create_ui().launch()
|
evaluation.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
def render_eval_info():
|
4 |
+
text = r"""
|
5 |
+
|
6 |
+
The Iqra’Eval challenge provides a shared, transparent platform to benchmark phoneme‐prediction systems on our open testset (“IqraEval/open_testset”).
|
7 |
+
|
8 |
+
**Submission Details**
|
9 |
+
– Submit a UTF‑8 CSV named **teamID_submission.csv** with exactly two columns:
|
10 |
+
1. **id**: utterance identifier (e.g. “0000_0001”)
|
11 |
+
2. **predicted_sequence**: your predicted phoneme sequence (space‑separated)
|
12 |
+
|
13 |
+
```csv
|
14 |
+
id,predicted_sequence
|
15 |
+
0000_0001,i n n a m a a y a …
|
16 |
+
0000_0002,m a a n a n s a …
|
17 |
+
…
|
18 |
+
```
|
19 |
+
**Evaluation Criteria**
|
20 |
+
– Leaderboard ranking is based on phoneme‑level **F1‑score**, computed via a two‑stage (detection + diagnostic) hierarchy:
|
21 |
+
|
22 |
+
1. **Detection (error vs. correct)**
|
23 |
+
- **TR (True Rejects)**: mispronounced phonemes correctly flagged
|
24 |
+
- **FA (False Accepts)**: mispronunciations missed
|
25 |
+
- **FR (False Rejects)**: correct phonemes wrongly flagged
|
26 |
+
- **TA (True Accepts)**: correct phonemes correctly passed
|
27 |
+
|
28 |
+
**Metrics:**
|
29 |
+
|
30 |
+
- **Precision** = `TR / (TR + FR)`
|
31 |
+
- **Recall** = `TR / (TR + FA)`
|
32 |
+
- **F1** = `2 · Precision · Recall / (Precision + Recall)`
|
33 |
+
|
34 |
+
2. **Diagnostic (substitution/deletion/insertion errors)**
|
35 |
+
See the **Metrics** tab for breakdown into:
|
36 |
+
- **DER**: Deletion Error Rate
|
37 |
+
- **IER**: Insertion Error Rate
|
38 |
+
- **SER**: Substitution Error Rate
|
39 |
+
|
40 |
+
– Once we receive your file (email: **[email protected]**), your submission is auto‑evaluated and placed on the leaderboard.
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
"""
|
45 |
+
return gr.Markdown(text, latex_delimiters=[{ "left": "$", "right": "$", "display": True }])
|
leaderboard.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pandas as pd
|
3 |
+
import gradio as gr
|
4 |
+
from utils import load_leaderboard
|
5 |
+
import numpy as np
|
6 |
+
from huggingface_hub import snapshot_download
|
7 |
+
|
8 |
+
|
9 |
+
def make_clickable(url, name):
|
10 |
+
return f'<a href="{url}" target="_blank">{name}</a>'
|
11 |
+
|
12 |
+
def render_info_html():
|
13 |
+
info_text = """
|
14 |
+
The Iqra’Eval challenge provides a shared, transparent platform to benchmark sequence‐prediction systems on our open testset (“IqraEval/open_testset”).
|
15 |
+
|
16 |
+
To participate, please:
|
17 |
+
|
18 |
+
1. Use the IqraEval open testset “IqraEval/open_testset”
|
19 |
+
2. Package your predictions in a CSV file with exactly two columns, **ID** and **predicted_sequence**
|
20 |
+
3. Send your CSV along with your **team_name** to **[email protected]**
|
21 |
+
|
22 |
+
Once we receive your file, your system will be automatically evaluated and placed on the public leaderboard. Check out the **Metrics** tab to see how scores are computed, and the **Leaderboard** tab for current rankings.
|
23 |
+
|
24 |
+
If your model doesn’t appear, use the “Submit a request” link under the **Support** tab. Good luck, and we look forward to seeing your results!
|
25 |
+
"""
|
26 |
+
|
27 |
+
# HTML formatted info text
|
28 |
+
return gr.Markdown(info_text)
|
29 |
+
|
30 |
+
def highlight_min(s, props=''):
|
31 |
+
return np.where(s == np.nanmin(s.values), props, '')
|
32 |
+
|
33 |
+
def render_leader_board(leaderboard_df):
|
34 |
+
|
35 |
+
if not leaderboard_df.empty:
|
36 |
+
print(leaderboard_df.shape)
|
37 |
+
leaderboard_df = leaderboard_df.sort_values(by="F1-score", ascending=True).reset_index(drop=True)
|
38 |
+
|
39 |
+
# Assign rank emojis 🥇🥈🥉
|
40 |
+
emojis = ["🥇", "🥈", "🥉"]
|
41 |
+
|
42 |
+
leaderboard_df.loc[0, "System"] = f"{emojis[0]} {leaderboard_df.System[0]}"
|
43 |
+
leaderboard_df.loc[1, "System"] = f"{emojis[1]} {leaderboard_df.System[1]}"
|
44 |
+
leaderboard_df.loc[2, "System"] = f"{emojis[2]} {leaderboard_df.System[2]}"
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
styler = (
|
49 |
+
leaderboard_df
|
50 |
+
.style \
|
51 |
+
.format(precision=2)
|
52 |
+
.apply(highlight_min, props='color:green', axis=0)
|
53 |
+
)
|
54 |
+
|
55 |
+
return gr.Dataframe(styler, datatype=['markdown'] * 2 + ['number'] * 16, elem_id="leaderboard-table")
|
56 |
+
return gr.HTML(value="<p>No data available in the leaderboard.</p>")
|
57 |
+
|
58 |
+
def render_citation():
|
59 |
+
return gr.Markdown(r"""
|
60 |
+
If you use Iqra'Eval in your work, it can be cited as:
|
61 |
+
|
62 |
+
```bibtex
|
63 |
+
@misc{kheir2025unifiedbenchmarkarabicpronunciation,
|
64 |
+
title={Towards a Unified Benchmark for Arabic Pronunciation Assessment: Quranic Recitation as Case Study},
|
65 |
+
author={Yassine El Kheir and Omnia Ibrahim and Amit Meghanani and Nada Almarwani and Hawau Olamide Toyin and Sadeen Alharbi and Modar Alfadly and Lamya Alkanhal and Ibrahim Selim and Shehab Elbatal and Salima Mdhaffar and Thomas Hain and Yasser Hifny and Mostafa Shahin and Ahmed Ali},
|
66 |
+
year={2025},
|
67 |
+
eprint={2506.07722},
|
68 |
+
archivePrefix={arXiv},
|
69 |
+
primaryClass={cs.SD},
|
70 |
+
url={https://arxiv.org/abs/2506.07722},
|
71 |
+
}
|
72 |
+
```""")
|
leaderboard.tsv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
System,Team,F1-score,Precision,Recall,Detection ERROR
|
2 |
+
HuBERT,Baseline,40.11,32.12,13.16,50
|
3 |
+
mHuBERT,Baseline,50.11,32.12,13.16,50
|
4 |
+
Wav2Vec2.0,Baseline,30.11,32.12,13.16,50
|
5 |
+
WavLM,Baseline,20.11,32.12,13.16,50
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gradio
|
submission.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
def render_submission_page():
|
4 |
+
text = r"""
|
5 |
+
**Submit Your System**
|
6 |
+
Want to see your model on the Iqra’Eval leaderboard? Send your **team_name** and a CSV (with columns `id`,`predicted_sequence`) for the `IqraEval/open_testset` to **[email protected]**. We’ll handle the evaluation and add your entry automatically.
|
7 |
+
|
8 |
+
- **Open Testset**: `IqraEval/open_testset`
|
9 |
+
|
10 |
+
"""
|
11 |
+
|
12 |
+
|
13 |
+
return gr.Markdown(text)
|
14 |
+
|
15 |
+
|
16 |
+
|
utils.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def load_leaderboard(db_path):
|
4 |
+
df = pd.read_csv(db_path) # Update table name if needed
|
5 |
+
|
6 |
+
return df
|
7 |
+
|
8 |
+
|
9 |
+
custom_css = """
|
10 |
+
#leaderboard-table th .header-content {
|
11 |
+
white-space: nowrap;
|
12 |
+
}
|
13 |
+
"""
|