Kabatubare commited on
Commit
1111e0a
·
verified ·
1 Parent(s): 09457f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -23
app.py CHANGED
@@ -2,49 +2,72 @@ import gradio as gr
2
  from audioseal import AudioSeal
3
  import torch
4
  import torchaudio
 
5
  import traceback
 
 
 
 
6
 
7
- def detect_watermark(audio_file_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  try:
9
- # Load the audio file and resample if necessary
10
  waveform, sample_rate = torchaudio.load(audio_file_path)
 
 
 
11
  if sample_rate != 16000:
12
- resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
13
- waveform = resample_transform(waveform)
14
  sample_rate = 16000
15
 
16
- # Normalize waveform loudness
17
- waveform = torch.clamp(waveform, min=-1.0, max=1.0)
18
-
19
- # Ensure waveform has a batch dimension for processing
20
  if waveform.ndim < 3:
21
  waveform = waveform.unsqueeze(0)
22
 
23
- # Initialize the AudioSeal detector
24
  detector = AudioSeal.load_detector("audioseal_detector_16bits")
25
-
26
- # Detect watermark (simplified to binary outcome for AI-generated or not)
27
- result, _ = detector.detect_watermark(waveform, message_threshold=0.99)
28
 
29
- # Simplify the output message
30
- if result == 1: # Assuming '1' means AI-generated
31
- detection_result = "The audio is likely AI-generated."
32
- else: # Assuming '0' means human-created
33
- detection_result = "The audio is likely human-created."
34
 
35
- return detection_result
 
 
 
 
 
36
  except Exception as e:
37
  error_traceback = traceback.format_exc()
38
- return f"Error occurred: {e}\n\n{error_traceback}"
39
 
40
- # Define the Gradio interface
41
  interface = gr.Interface(
42
  fn=detect_watermark,
43
- inputs=gr.Audio(label="Upload your audio", type="filepath"),
44
- outputs="text",
45
  title="Deep Fake Defender: AI Voice Cloning Detection",
46
- description="Upload an audio file to check if it's AI-generated or genuine."
47
  )
48
 
49
  if __name__ == "__main__":
50
  interface.launch()
 
 
2
  from audioseal import AudioSeal
3
  import torch
4
  import torchaudio
5
+ import torchaudio.transforms as T
6
  import traceback
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import io
10
+ from PIL import Image
11
 
12
+ def plot_spectrogram(waveform, sample_rate):
13
+ """Plot and return a spectrogram."""
14
+ spectrogram_transform = T.Spectrogram()
15
+ spectrogram = spectrogram_transform(waveform)
16
+ spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
17
+
18
+ plt.figure(figsize=(10, 4))
19
+ plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
20
+ plt.colorbar(format='%+2.0f dB')
21
+ plt.title('Spectrogram')
22
+ plt.xlabel('Time Frame')
23
+ plt.ylabel('Frequency')
24
+
25
+ buf = io.BytesIO()
26
+ plt.savefig(buf, format='png')
27
+ plt.close()
28
+ buf.seek(0)
29
+
30
+ return Image.open(buf)
31
+
32
+ def detect_watermark(audio_file_path, threshold=0.99):
33
  try:
 
34
  waveform, sample_rate = torchaudio.load(audio_file_path)
35
+
36
+ # Normalize and resample
37
+ waveform = waveform / torch.max(torch.abs(waveform))
38
  if sample_rate != 16000:
39
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
40
+ waveform = resampler(waveform)
41
  sample_rate = 16000
42
 
 
 
 
 
43
  if waveform.ndim < 3:
44
  waveform = waveform.unsqueeze(0)
45
 
 
46
  detector = AudioSeal.load_detector("audioseal_detector_16bits")
47
+ result, confidence = detector.detect_watermark(waveform, message_threshold=threshold)
 
 
48
 
49
+ # Visual feedback
50
+ waveform_image = plot_spectrogram(waveform.squeeze(), sample_rate)
 
 
 
51
 
52
+ if result:
53
+ detection_message = f"AI-generated with confidence: {np.mean(confidence.numpy()):.2f}"
54
+ else:
55
+ detection_message = "Likely human-generated or the AI watermark is undetectable at the current threshold."
56
+
57
+ return detection_message, waveform_image
58
  except Exception as e:
59
  error_traceback = traceback.format_exc()
60
+ return f"Error occurred: {e}\n\n{error_traceback}", None
61
 
62
+ # Interface with dynamic threshold and visualization
63
  interface = gr.Interface(
64
  fn=detect_watermark,
65
+ inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)],
66
+ outputs=["text", "image"],
67
  title="Deep Fake Defender: AI Voice Cloning Detection",
68
+ description="Upload an audio file to check if it's AI-generated or genuine. Adjust the detection threshold to change sensitivity."
69
  )
70
 
71
  if __name__ == "__main__":
72
  interface.launch()
73
+