import gradio as gr
import pandas as pd
LEADERBOARD_PATH = "leaderboard.csv"
def load_leaderboard():
try:
return pd.read_csv(LEADERBOARD_PATH)
except FileNotFoundError:
return pd.DataFrame([{"Status": "No leaderboard data available"}])
def style_leaderboard(df: pd.DataFrame):
"""Return styled HTML with highlighted best performers and professional formatting."""
if df.empty:
return "
No data available.
"
num_cols = [c for c in df.columns if c not in ["Rank", "Model"]]
def highlight_best(s):
if pd.api.types.is_numeric_dtype(s):
max_val = s.max()
return ['color: #6AA84F; font-weight: 600;' if v == max_val else '' for v in s]
return ['' for _ in s]
df = df.reset_index(drop=True)
styled = (df.style.apply(highlight_best, subset=num_cols, axis=0).format(precision=1).hide(axis='index'))
# Professional table styling
html = styled.to_html()
return f"""
"""
def leaderboard_view():
df = load_leaderboard()
return style_leaderboard(df)
# ---------------- Gradio UI ---------------- #
with gr.Blocks(css="""
.gradio-container {
max-width: 1200px !important;
margin: auto;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
}
#title-image {
margin: 20px auto;
display: block;
max-width: 800px;
}
.gr-markdown h1 {
color: #1e293b;
font-weight: 700;
margin-bottom: 16px;
}
.gr-markdown h2 {
color: #334155;
font-weight: 600;
margin-top: 24px;
margin-bottom: 12px;
}
.gr-markdown h3 {
color: #475569;
font-weight: 600;
margin-bottom: 8px;
}
.gr-markdown p {
color: #64748b;
line-height: 1.6;
}
.gr-tab-nav button {
font-weight: 500;
}
""") as demo:
# Banner image
gr.Image("title.png", elem_id="title-image", show_label=False)
# Professional introduction
gr.Markdown("""
# DFBench: The Image Deepfake Detection Benchmark 2025
DFBench provides a standardized evaluation for computer vision deepfake detection systems.
This leaderboard focuses on image deepfake detection, e.g. the output of text-to-image and image-to-image models.
**Objectives:**
- Allow fair comparison between deepfake detection models on unseen test data (no fine tuning on the test data possible)
- Advance the state-of-the-art in synthetic media identification
""")
with gr.Tab("Leaderboard"):
gr.Markdown("## Leaderboard Image Deepfake Detection")
gr.HTML(leaderboard_view())
gr.Markdown("""
*The Leaderboard is updated upon validation of new submissions. All results are evaluated on the official [test dataset](https://huggingface.co/datasets/DFBench/DFBench_Image25).*
""")
with gr.Tab("Submission Guidelines"):
gr.Markdown("""
# Model Submission Process
**Official Benchmark Test Dataset:** [DFBench/DFBench_Image25](https://huggingface.co/datasets/DFBench/DFBench_Image25)
The test dataset comprises **2,920 images**. The test data is unlabeled. Each image is either:
- **Real:** An authentic, unmodified image
- **Fake:** AI-generated or synthetically modified content
Since there are no labels, you cannot (and should not) train your model on the test data.
---
## Submission Requirements
### File Format
Submit predictions as a CSV file with the following structure: `filename,label`.
- `filename`: Exact filename as provided in the dataset
- `label`: Binary classification result (`real` or `fake`)
For example:
```
filename,label
1.jpg,fake
2.jpeg,real
3.webp,fake
...
2920.png,fake
```
### Submission Process
1. Generate predictions for all 2,920 test images
2. Format results according to specification above
3. Send your CSV file submission to: **submission@df-bench.com**. The name of the file should correspond to the leaderboard model name, e.g. `Model_This_name.csv` will be included as `Model This name` in the leaderboard.
### Evaluation Timeline
- Submissions are processed within 5-7 business days
- Approved submissions are added to the public leaderboard
## Notes
- Each research group may submit one set of scores per month
- All submissions undergo automated validation before leaderboard inclusion
- The authors reserve the right to not publish or to remove a submission at their discretion
- Submissions may be excluded if found to violate ethical guidelines, contain malicious content, or appear fraudulent
- Benchmark maintainers may adjust evaluation protocols as the dataset and task evolve
- No warranties are provided regarding benchmark results, which are intended strictly for research and comparison purposes
For technical inquiries regarding the evaluation process, please contact the benchmark maintainers through the submission email.
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)