Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns | |
| # Deepfake detector data | |
| data_avg_performance = { | |
| "Detector": ["[NPR](https://arxiv.org/abs/2312.10461)", | |
| "[UCF](http://export.arxiv.org/abs/2304.13949)", | |
| "[CAMO](https://bitmindlabs.notion.site/CAMO-Content-Aware-Model-Orchestration-CAMO-Framework-for-Deepfake-Detection-43ef46a0f9de403abec7a577a45cd075?pvs=74)"], | |
| "Accuracy": [0.7169, 0.7229, 0.7555], | |
| "Precision": [0.9193, 0.9436, 0.9442], | |
| "Recall": [0.5996, 0.592, 0.647], | |
| "F1-Score": [0.7258, 0.7275, 0.7679], | |
| "MCC": [0.5044, 0.5285, 0.5707], | |
| } | |
| data_dataset_accuracy = { | |
| "Detector": ["[NPR](https://arxiv.org/abs/2312.10461)", | |
| "[UCF](http://export.arxiv.org/abs/2304.13949)", | |
| "[CAMO](https://bitmindlabs.notion.site/CAMO-Content-Aware-Model-Orchestration-CAMO-Framework-for-Deepfake-Detection-43ef46a0f9de403abec7a577a45cd075?pvs=74)"], | |
| "CelebA-HQ": [0.987, 0.995, 0.999], | |
| "Flickr30k": [0.916, 0.981, 0.979], | |
| "ImageNet": [0.834, 0.847, 0.831], | |
| "DiffusionDB": [0.876, 0.85, 0.961], | |
| "CelebA-HQ-SDXL": [0.386, 0.484, 0.682], | |
| "CelebA-HQ-Flux": [0.846, 0.794, 0.722], | |
| "Flickr30k-SDXL": [0.302, 0.256, 0.28], | |
| "MS-COCO-Flux": [0.588, 0.576, 0.59], | |
| } | |
| # Convert data to DataFrames | |
| df_avg_performance = pd.DataFrame(data_avg_performance) | |
| df_dataset_accuracy = pd.DataFrame(data_dataset_accuracy) | |
| def init_leaderboard(): | |
| if df_avg_performance.empty: | |
| raise ValueError("Leaderboard DataFrame is empty.") | |
| # Display average performance leaderboard only | |
| leaderboard = Leaderboard( | |
| value=df_avg_performance, | |
| datatype=['str', 'number', 'number', 'number', 'number', 'number'], | |
| select_columns=SelectColumns( | |
| default_selection=["Detector", "Accuracy", "Precision", "Recall", "F1-Score", "MCC"], | |
| label="Select Columns to Display:" | |
| ), | |
| search_columns=["Detector"], | |
| filter_columns=[ | |
| ColumnFilter("Detector", type="checkboxgroup", label="Detectors"), | |
| ], | |
| bool_checkboxgroup_label="Hide detectors", | |
| interactive=False, | |
| ) | |
| return leaderboard | |
| # Function to highlight the maximum values in bold and color them | |
| def highlight_max(s): | |
| is_max = s == s.max() | |
| return ['font-weight: bold; color: red;' if v else '' for v in is_max] | |
| # Style the dataframe | |
| def style_dataframe(df): | |
| return df.style.apply(highlight_max, subset=["Accuracy", "Precision", "Recall", "F1-Score", "MCC"]) | |
| def style_dataset_accuracy(df): | |
| return df.style.apply(highlight_max, subset=df.columns[1:]) | |
| # Gradio demo with the styled dataframes | |
| demo = gr.Blocks() | |
| with demo: | |
| # Centered Title and Welcome message | |
| gr.HTML(""" | |
| <div style="text-align:center;"> | |
| <h1> Deepfake Detection Arena (DFD) Leaderboard</h1> | |
| </div> | |
| """) | |
| # Description/Intro Section | |
| gr.Markdown(""" | |
| ## π― The Open Benchmark for Detecting AI-Generated Images | |
| [DFD-Arena](https://github.com/BitMind-AI/dfd-arena) is the first benchmark to address the open-source computer vision community's need for a *comprehensive evaluation framework* for state-of-the-art (SOTA) detection of AI-generated images. | |
| While [previous studies](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9721302) have focused on benchmarking the SOTA on content-specific subsets of the deepfake detection problem, e.g. human face deepfake benchmarking via [DeepfakeBench](https://github.com/SCLBD/DeepfakeBench), these benchmarks do not adequately account for the broad spectrum of real and generated image types seen in everyday scenarios. | |
| ### π Explore DFD-Arena | |
| Learn how the framework evaluates on diverse, content-rich images with semantic balance between real and generated data: | |
| - π [Code: DFD-Arena GitHub Repository](https://github.com/BitMind-AI/dfd-arena) | |
| - π [Blog: Technical Write-Up](https://bitmindlabs.notion.site/BitMind-Deepfake-Detection-Arena-106af85402838007830ece5a6f3f35a8?pvs=25) | |
| ### βοΈ Authorship | |
| Both DFD-Arena and novel synthetic image datasets used for evaluation are created by [BitMind](https://www.bitmind.ca/). | |
| - π¦ [X/Twitter: @BitMindAI](https://x.com/BitMindAI) | |
| """) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Deepfake Detection Arena", elem_id="dfd-arena-leaderboard-tab"): | |
| # Add text for Average Performance Metrics | |
| gr.Markdown("## Average Performance Metrics") | |
| leaderboard = init_leaderboard() | |
| # Display the average performance metrics as an HTML-styled table | |
| #styled_avg_performance = style_dataframe(df_avg_performance) | |
| #gr.HTML(styled_avg_performance.to_html(), label="Average Performance Metrics") | |
| # Add a separate dataframe for dataset-specific accuracy with highlighted max values | |
| gr.Markdown("## Dataset-specific Accuracy") | |
| gr.DataFrame(value=df_dataset_accuracy) | |
| #styled_dataset_accuracy = style_dataset_accuracy(df_dataset_accuracy) | |
| #gr.HTML(styled_dataset_accuracy.to_html(), label="Dataset-specific Accuracy") | |
| # with gr.TabItem("π About"): | |
| # gr.Markdown("This leaderboard evaluates deepfake detection algorithms on various metrics and datasets.") | |
| # with gr.TabItem("π Submit Detector Results"): | |
| # gr.Markdown("Submit your detector results for evaluation.") | |
| # Add submission form elements as needed here | |
| demo.queue(default_concurrency_limit=40).launch() | |