dfd-arena-leaderboard

Running

App Files Files Community

dfd-arena-leaderboard / app.py

andrew-bitmind

Model links

7f58784 verified about 1 year ago

raw

history blame

5.77 kB

	import gradio as gr
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler
	from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns

	# Deepfake detector data
	data_avg_performance = {
	"Detector": ["[NPR](https://arxiv.org/abs/2312.10461)",
	"[UCF](http://export.arxiv.org/abs/2304.13949)",
	"[CAMO](https://bitmindlabs.notion.site/CAMO-Content-Aware-Model-Orchestration-CAMO-Framework-for-Deepfake-Detection-43ef46a0f9de403abec7a577a45cd075?pvs=74)"],
	"Accuracy": [0.7169, 0.7229, 0.7555],
	"Precision": [0.9193, 0.9436, 0.9442],
	"Recall": [0.5996, 0.592, 0.647],
	"F1-Score": [0.7258, 0.7275, 0.7679],
	"MCC": [0.5044, 0.5285, 0.5707],
	}

	data_dataset_accuracy = {
	"Detector": ["[NPR](https://arxiv.org/abs/2312.10461)",
	"[UCF](http://export.arxiv.org/abs/2304.13949)",
	"[CAMO](https://bitmindlabs.notion.site/CAMO-Content-Aware-Model-Orchestration-CAMO-Framework-for-Deepfake-Detection-43ef46a0f9de403abec7a577a45cd075?pvs=74)"],
	"CelebA-HQ": [0.987, 0.995, 0.999],
	"Flickr30k": [0.916, 0.981, 0.979],
	"ImageNet": [0.834, 0.847, 0.831],
	"DiffusionDB": [0.876, 0.85, 0.961],
	"CelebA-HQ-SDXL": [0.386, 0.484, 0.682],
	"CelebA-HQ-Flux": [0.846, 0.794, 0.722],
	"Flickr30k-SDXL": [0.302, 0.256, 0.28],
	"MS-COCO-Flux": [0.588, 0.576, 0.59],
	}

	# Convert data to DataFrames
	df_avg_performance = pd.DataFrame(data_avg_performance)
	df_dataset_accuracy = pd.DataFrame(data_dataset_accuracy)

	def init_leaderboard():
	if df_avg_performance.empty:
	raise ValueError("Leaderboard DataFrame is empty.")

	# Display average performance leaderboard only
	leaderboard = Leaderboard(
	value=df_avg_performance,
	datatype=['str', 'number', 'number', 'number', 'number', 'number'],
	select_columns=SelectColumns(
	default_selection=["Detector", "Accuracy", "Precision", "Recall", "F1-Score", "MCC"],
	label="Select Columns to Display:"
	),
	search_columns=["Detector"],
	filter_columns=[
	ColumnFilter("Detector", type="checkboxgroup", label="Detectors"),
	],
	bool_checkboxgroup_label="Hide detectors",
	interactive=False,
	)
	return leaderboard

	# Function to highlight the maximum values in bold and color them
	def highlight_max(s):
	is_max = s == s.max()
	return ['font-weight: bold; color: red;' if v else '' for v in is_max]

	# Style the dataframe
	def style_dataframe(df):
	return df.style.apply(highlight_max, subset=["Accuracy", "Precision", "Recall", "F1-Score", "MCC"])

	def style_dataset_accuracy(df):
	return df.style.apply(highlight_max, subset=df.columns[1:])

	# Gradio demo with the styled dataframes
	demo = gr.Blocks()

	with demo:
	# Centered Title and Welcome message
	gr.HTML("""
	<div style="text-align:center;">
	<h1> Deepfake Detection Arena (DFD) Leaderboard</h1>
	</div>
	""")

	# Description/Intro Section
	gr.Markdown("""
	## 🎯 The Open Benchmark for Detecting AI-Generated Images

	[DFD-Arena](https://github.com/BitMind-AI/dfd-arena) is the first benchmark to address the open-source computer vision community's need for a comprehensive evaluation framework for state-of-the-art (SOTA) detection of AI-generated images.

	While [previous studies](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9721302) have focused on benchmarking the SOTA on content-specific subsets of the deepfake detection problem, e.g. human face deepfake benchmarking via [DeepfakeBench](https://github.com/SCLBD/DeepfakeBench), these benchmarks do not adequately account for the broad spectrum of real and generated image types seen in everyday scenarios.

	### 🔍 Explore DFD-Arena

	Learn how the framework evaluates on diverse, content-rich images with semantic balance between real and generated data:

	- 📂 [Code: DFD-Arena GitHub Repository](https://github.com/BitMind-AI/dfd-arena)

	- 📝 [Blog: Technical Write-Up](https://bitmindlabs.notion.site/BitMind-Deepfake-Detection-Arena-106af85402838007830ece5a6f3f35a8?pvs=25)

	### ✍️ Authorship

	Both DFD-Arena and novel synthetic image datasets used for evaluation are created by [BitMind](https://www.bitmind.ca/).

	- 🐦 [X/Twitter: @BitMindAI](https://x.com/BitMindAI)
	""")

	with gr.Tabs():
	with gr.TabItem("🏅 Deepfake Detection Arena", elem_id="dfd-arena-leaderboard-tab"):
	# Add text for Average Performance Metrics
	gr.Markdown("## Average Performance Metrics")
	leaderboard = init_leaderboard()

	# Display the average performance metrics as an HTML-styled table
	#styled_avg_performance = style_dataframe(df_avg_performance)
	#gr.HTML(styled_avg_performance.to_html(), label="Average Performance Metrics")

	# Add a separate dataframe for dataset-specific accuracy with highlighted max values
	gr.Markdown("## Dataset-specific Accuracy")
	gr.DataFrame(value=df_dataset_accuracy)
	#styled_dataset_accuracy = style_dataset_accuracy(df_dataset_accuracy)
	#gr.HTML(styled_dataset_accuracy.to_html(), label="Dataset-specific Accuracy")

	# with gr.TabItem("📝 About"):
	# gr.Markdown("This leaderboard evaluates deepfake detection algorithms on various metrics and datasets.")

	# with gr.TabItem("🚀 Submit Detector Results"):
	# gr.Markdown("Submit your detector results for evaluation.")
	# Add submission form elements as needed here

	demo.queue(default_concurrency_limit=40).launch()