|
import gradio as gr |
|
import torch |
|
import torchaudio |
|
import torchaudio.transforms as T |
|
import traceback |
|
import matplotlib.pyplot as plt |
|
import io |
|
from PIL import Image |
|
|
|
|
|
try: |
|
from audioseal import AudioSeal |
|
print("AudioSeal is available for watermark detection.") |
|
except ImportError as e: |
|
print(f"AudioSeal could not be imported: {e}") |
|
|
|
def load_and_resample_audio(audio_file_path, target_sample_rate=16000): |
|
waveform, sample_rate = torchaudio.load(audio_file_path) |
|
if sample_rate != target_sample_rate: |
|
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate) |
|
waveform = resampler(waveform) |
|
return waveform, target_sample_rate |
|
|
|
def extract_mfcc_features(waveform, sample_rate, n_mfcc=40, n_mels=128, win_length=400, hop_length=160): |
|
mfcc_transform = T.MFCC( |
|
sample_rate=sample_rate, |
|
n_mfcc=n_mfcc, |
|
melkwargs={ |
|
'n_fft': 400, |
|
'n_mels': n_mels, |
|
'hop_length': hop_length, |
|
'win_length': win_length |
|
} |
|
) |
|
mfcc = mfcc_transform(waveform) |
|
return mfcc.mean(dim=2) |
|
|
|
def plot_spectrogram(waveform, sample_rate): |
|
if waveform.ndim == 1: |
|
waveform = waveform.unsqueeze(0) |
|
spectrogram_transform = T.Spectrogram() |
|
spectrogram = spectrogram_transform(waveform) |
|
spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram) |
|
plt.figure(figsize=(10, 4)) |
|
plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower') |
|
plt.axis('off') |
|
buf = io.BytesIO() |
|
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0) |
|
plt.close() |
|
buf.seek(0) |
|
return Image.open(buf) |
|
|
|
def detect_watermark(audio_data, sample_rate): |
|
|
|
if 'AudioSeal' not in globals(): |
|
spectrogram_image = plot_spectrogram(audio_data, sample_rate) |
|
return "AudioSeal not available", spectrogram_image |
|
|
|
|
|
waveform, sr = load_and_resample_audio(audio_data, target_sample_rate=16000) |
|
|
|
|
|
if not isinstance(waveform, torch.Tensor): |
|
return "Error: waveform is not a tensor.", plot_spectrogram(waveform, sr) |
|
|
|
|
|
detector = AudioSeal.load_detector("audioseal_detector_16bits") |
|
|
|
|
|
results, messages = detector.forward(waveform.unsqueeze(0), sample_rate=sample_rate) |
|
detect_probs = results[:, 1, :] |
|
result = detect_probs.mean().cpu().item() |
|
message = f"Detection result: {'Watermarked Audio' if result > 0.5 else 'Not watermarked'}" |
|
spectrogram_image = plot_spectrogram(waveform, sr) |
|
|
|
return message, spectrogram_image |
|
|
|
def main(audio_file_path): |
|
waveform, resampled_sr = load_and_resample_audio(audio_file_path) |
|
plot_spectrogram(waveform, resampled_sr) |
|
samples_per_batch = 5 * resampled_sr |
|
audio_batches = torch.split(waveform, samples_per_batch, dim=1)[:-1] |
|
audio_batched = torch.concat(audio_batches, dim=0) |
|
audio_batched = audio_batched.unsqueeze(1) |
|
result = detect_watermark(audio_batched, resampled_sr) |
|
print(f"Probability of watermark: {result}") |
|
|
|
|
|
interface = gr.Interface( |
|
fn=detect_watermark, |
|
inputs=gr.Audio(label="Upload your audio", type="filepath"), |
|
outputs=["text", "image"], |
|
title="Deep Fake Defender: AudioSeal Watermark Detection", |
|
description="Analyzes audio to detect AI-generated content." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |