import gradio as gr import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns # Deepfake detector data data_avg_performance = { "Detector": ["[NPR](https://arxiv.org/abs/2312.10461)", "[UCF](http://export.arxiv.org/abs/2304.13949)", "[CAMO](https://bitmindlabs.notion.site/CAMO-Content-Aware-Model-Orchestration-CAMO-Framework-for-Deepfake-Detection-43ef46a0f9de403abec7a577a45cd075?pvs=74)"], "Accuracy": [0.7169, 0.7229, 0.7555], "Precision": [0.9193, 0.9436, 0.9442], "Recall": [0.5996, 0.592, 0.647], "F1-Score": [0.7258, 0.7275, 0.7679], "MCC": [0.5044, 0.5285, 0.5707], } data_dataset_accuracy = { "Detector": ["[NPR](https://arxiv.org/abs/2312.10461)", "[UCF](http://export.arxiv.org/abs/2304.13949)", "[CAMO](https://bitmindlabs.notion.site/CAMO-Content-Aware-Model-Orchestration-CAMO-Framework-for-Deepfake-Detection-43ef46a0f9de403abec7a577a45cd075?pvs=74)"], "CelebA-HQ": [0.987, 0.995, 0.999], "Flickr30k": [0.916, 0.981, 0.979], "ImageNet": [0.834, 0.847, 0.831], "DiffusionDB": [0.876, 0.85, 0.961], "CelebA-HQ-SDXL": [0.386, 0.484, 0.682], "CelebA-HQ-Flux": [0.846, 0.794, 0.722], "Flickr30k-SDXL": [0.302, 0.256, 0.28], "MS-COCO-Flux": [0.588, 0.576, 0.59], } # Convert data to DataFrames df_avg_performance = pd.DataFrame(data_avg_performance) df_dataset_accuracy = pd.DataFrame(data_dataset_accuracy) def init_leaderboard(): if df_avg_performance.empty: raise ValueError("Leaderboard DataFrame is empty.") # Display average performance leaderboard only leaderboard = Leaderboard( value=df_avg_performance, datatype=['str', 'number', 'number', 'number', 'number', 'number'], select_columns=SelectColumns( default_selection=["Detector", "Accuracy", "Precision", "Recall", "F1-Score", "MCC"], label="Select Columns to Display:" ), search_columns=["Detector"], filter_columns=[ ColumnFilter("Detector", type="checkboxgroup", label="Detectors"), ], bool_checkboxgroup_label="Hide detectors", interactive=False, ) return leaderboard # Function to highlight the maximum values in bold and color them def highlight_max(s): is_max = s == s.max() return ['font-weight: bold; color: red;' if v else '' for v in is_max] # Style the dataframe def style_dataframe(df): return df.style.apply(highlight_max, subset=["Accuracy", "Precision", "Recall", "F1-Score", "MCC"]) def style_dataset_accuracy(df): return df.style.apply(highlight_max, subset=df.columns[1:]) # Gradio demo with the styled dataframes demo = gr.Blocks() with demo: # Centered Title and Welcome message gr.HTML("""

Deepfake Detection Arena (DFD) Leaderboard

""") # Description/Intro Section gr.Markdown(""" ## 🎯 The Open Benchmark for Detecting AI-Generated Images [DFD-Arena](https://github.com/BitMind-AI/dfd-arena) is the first benchmark to address the open-source computer vision community's need for a *comprehensive evaluation framework* for state-of-the-art (SOTA) detection of AI-generated images. While [previous studies](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9721302) have focused on benchmarking the SOTA on content-specific subsets of the deepfake detection problem, e.g. human face deepfake benchmarking via [DeepfakeBench](https://github.com/SCLBD/DeepfakeBench), these benchmarks do not adequately account for the broad spectrum of real and generated image types seen in everyday scenarios. ### 🔍 Explore DFD-Arena Learn how the framework evaluates on diverse, content-rich images with semantic balance between real and generated data: - 📂 [Code: DFD-Arena GitHub Repository](https://github.com/BitMind-AI/dfd-arena) - 📝 [Blog: Technical Write-Up](https://bitmindlabs.notion.site/BitMind-Deepfake-Detection-Arena-106af85402838007830ece5a6f3f35a8?pvs=25) ### ✍️ Authorship Both DFD-Arena and novel synthetic image datasets used for evaluation are created by [BitMind](https://www.bitmind.ca/). - 🐦 [X/Twitter: @BitMindAI](https://x.com/BitMindAI) """) with gr.Tabs(): with gr.TabItem("🏅 Deepfake Detection Arena", elem_id="dfd-arena-leaderboard-tab"): # Add text for Average Performance Metrics gr.Markdown("## Average Performance Metrics") leaderboard = init_leaderboard() # Display the average performance metrics as an HTML-styled table #styled_avg_performance = style_dataframe(df_avg_performance) #gr.HTML(styled_avg_performance.to_html(), label="Average Performance Metrics") # Add a separate dataframe for dataset-specific accuracy with highlighted max values gr.Markdown("## Dataset-specific Accuracy") gr.DataFrame(value=df_dataset_accuracy) #styled_dataset_accuracy = style_dataset_accuracy(df_dataset_accuracy) #gr.HTML(styled_dataset_accuracy.to_html(), label="Dataset-specific Accuracy") # with gr.TabItem("📝 About"): # gr.Markdown("This leaderboard evaluates deepfake detection algorithms on various metrics and datasets.") # with gr.TabItem("🚀 Submit Detector Results"): # gr.Markdown("Submit your detector results for evaluation.") # Add submission form elements as needed here demo.queue(default_concurrency_limit=40).launch()