Kimata commited on
Commit
e50136c
·
0 Parent(s):

add audio modality

Browse files
.gitattributes ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ efficientnet-b0/ filter=lfs diff=lfs merge=lfs -text
2
+ efficientnet-b0.zip filter=lfs diff=lfs merge=lfs -text
3
+ pre_trained_DF_RawNet2.pth filter=lfs diff=lfs merge=lfs -text
4
+ efficientnet-b0/* filter=lfs diff=lfs merge=lfs -text
5
+ efficientnet-b0/** filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Deepfakes_Video_Detector
3
+ emoji: 🔥
4
+ colorFrom: blue
5
+ colorTo: gray
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+
11
+ # Configuration
12
+
13
+ `title`: _string_
14
+ Display title for the Space
15
+
16
+ `emoji`: _string_
17
+ Space emoji (emoji-only character allowed)
18
+
19
+ `colorFrom`: _string_
20
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
+
22
+ `colorTo`: _string_
23
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
+
25
+ `sdk`: _string_
26
+ Can be either `gradio`, `streamlit`, or `static`
27
+
28
+ `sdk_version` : _string_
29
+ Only applicable for `streamlit` SDK.
30
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31
+
32
+ `app_file`: _string_
33
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code).
34
+ Path is relative to the root of the repository.
35
+
36
+ `pinned`: _boolean_
37
+ Whether the Space stays on top of your list.
__pycache__/pipeline.cpython-39.pyc ADDED
Binary file (5.84 kB). View file
 
__pycache__/rawnet.cpython-39.pyc ADDED
Binary file (9.72 kB). View file
 
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pipeline
3
+
4
+
5
+ title="EfficientNetV2 Deepfakes Video Detector"
6
+ description="EfficientNetV2 Deepfakes Image Detector by using frame-by-frame detection."
7
+
8
+
9
+ video_interface = gr.Interface(pipeline.deepfakes_video_predict,
10
+ gr.Video(),
11
+ "text",
12
+ examples = ["videos/celeb_synthesis.mp4", "videos/real-1.mp4"],
13
+ cache_examples = False
14
+ )
15
+
16
+
17
+ image_interface = gr.Interface(pipeline.deepfakes_image_predict,
18
+ gr.Image(),
19
+ "text",
20
+ examples = ["images/lady.jpg", "images/fake_image.jpg"],
21
+ cache_examples=False
22
+ )
23
+
24
+ audio_interface = gr.Interface(pipeline.deepfakes_audio_predict,
25
+ gr.Audio(),
26
+ "text",
27
+ examples = ["audios\DF_E_2000027.flac", "audios\DF_E_2000031.flac"],
28
+ cache_examples = False)
29
+
30
+
31
+ app = gr.TabbedInterface(interface_list= [image_interface, video_interface, audio_interface],
32
+ tab_names = ['Image inference', 'Video inference', 'audio_interface'])
33
+
34
+ if __name__ == '__main__':
35
+ app.launch(share = True)
audios/DF_E_2000027.flac ADDED
Binary file (30.3 kB). View file
 
audios/DF_E_2000028.flac ADDED
Binary file (29.7 kB). View file
 
audios/DF_E_2000031.flac ADDED
Binary file (65.2 kB). View file
 
audios/DF_E_2000032.flac ADDED
Binary file (80.3 kB). View file
 
efficientnet-b0.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e4c321c6a075d0d72676a00f3610db80f8dcd04e276af0c2ddf6d88cd9b2596
3
+ size 22846906
images/fake_image.jpg ADDED
images/lady.jpg ADDED
packages.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ffmpeg
2
+ libsm6
3
+ libxext6
pipeline.ipynb ADDED
@@ -0,0 +1,790 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\numpy\\_distributor_init.py:30: UserWarning: loaded more than 1 DLL from .libs:\n",
13
+ "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\numpy\\.libs\\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll\n",
14
+ "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\numpy\\.libs\\libopenblas64__v0.3.23-gcc_10_3_0.dll\n",
15
+ " warnings.warn(\"loaded more than 1 DLL from .libs:\"\n",
16
+ "c:\\Users\\debonair\\anaconda3\\lib\\site-packages\\tensorflow_addons\\utils\\tfa_eol_msg.py:23: UserWarning: \n",
17
+ "\n",
18
+ "TensorFlow Addons (TFA) has ended development and introduction of new features.\n",
19
+ "TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n",
20
+ "Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n",
21
+ "\n",
22
+ "For more information see: https://github.com/tensorflow/addons/issues/2807 \n",
23
+ "\n",
24
+ " warnings.warn(\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "import cv2\n",
30
+ "import numpy as np\n",
31
+ "from PIL import Image\n",
32
+ "import tensorflow as tf\n",
33
+ "import tensorflow_addons\n",
34
+ "import moviepy.editor as mp\n",
35
+ "from facenet_pytorch import MTCNN"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 2,
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "ename": "OSError",
45
+ "evalue": "No file or directory found at FINAL-EFFICIENTNETV2-B0",
46
+ "output_type": "error",
47
+ "traceback": [
48
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
49
+ "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
50
+ "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_25172\\3936866724.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mmtcnn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMTCNN\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmargin\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m14\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeep_all\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfactor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.7\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'cpu'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m#Load model.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkeras\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"FINAL-EFFICIENTNETV2-B0\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
51
+ "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\keras\\utils\\traceback_utils.py\u001b[0m in \u001b[0;36merror_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[1;31m# To get the full stack trace, call:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[1;31m# `tf.debugging.disable_traceback_filtering()`\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 70\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwith_traceback\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfiltered_tb\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 71\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;32mdel\u001b[0m \u001b[0mfiltered_tb\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
52
+ "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python39\\site-packages\\keras\\saving\\save.py\u001b[0m in \u001b[0;36mload_model\u001b[1;34m(filepath, custom_objects, compile, options)\u001b[0m\n\u001b[0;32m 224\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 225\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgfile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_str\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 226\u001b[1;33m raise IOError(\n\u001b[0m\u001b[0;32m 227\u001b[0m \u001b[1;34mf\"No file or directory found at {filepath_str}\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 228\u001b[0m )\n",
53
+ "\u001b[1;31mOSError\u001b[0m: No file or directory found at FINAL-EFFICIENTNETV2-B0"
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "# Load face detector\n",
59
+ "mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')\n",
60
+ "#Load model.\n",
61
+ "model = tf.keras.models.load_model(\"FINAL-EFFICIENTNETV2-B0\")"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": null,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "#Face Detection function, Reference: (Timesler, 2020);\n",
71
+ "class DetectionPipeline:\n",
72
+ " \"\"\"Pipeline class for detecting faces in the frames of a video file.\"\"\"\n",
73
+ "\n",
74
+ " def __init__(self, detector, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):\n",
75
+ " \"\"\"Constructor for DetectionPipeline class.\n",
76
+ "\n",
77
+ " Keyword Arguments:\n",
78
+ " n_frames {int} -- Total number of frames to load. These will be evenly spaced\n",
79
+ " throughout the video. If not specified (i.e., None), all frames will be loaded.\n",
80
+ " (default: {None})\n",
81
+ " batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32})\n",
82
+ " resize {float} -- Fraction by which to resize frames from original prior to face\n",
83
+ " detection. A value less than 1 results in downsampling and a value greater than\n",
84
+ " 1 result in upsampling. (default: {None})\n",
85
+ " \"\"\"\n",
86
+ " self.detector = detector\n",
87
+ " self.n_frames = n_frames\n",
88
+ " self.batch_size = batch_size\n",
89
+ " self.resize = resize\n",
90
+ " self.input_modality = input_modality\n",
91
+ "\n",
92
+ " def __call__(self, filename):\n",
93
+ " \"\"\"Load frames from an MP4 video and detect faces.\n",
94
+ "\n",
95
+ " Arguments:\n",
96
+ " filename {str} -- Path to video.\n",
97
+ " \"\"\"\n",
98
+ " # Create video reader and find length\n",
99
+ " if self.input_modality == 'video':\n",
100
+ " print('Input modality is video.')\n",
101
+ " v_cap = cv2.VideoCapture(filename)\n",
102
+ " v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
103
+ "\n",
104
+ " # Pick 'n_frames' evenly spaced frames to sample\n",
105
+ " if self.n_frames is None:\n",
106
+ " sample = np.arange(0, v_len)\n",
107
+ " else:\n",
108
+ " sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)\n",
109
+ "\n",
110
+ " # Loop through frames\n",
111
+ " faces = []\n",
112
+ " frames = []\n",
113
+ " for j in range(v_len):\n",
114
+ " success = v_cap.grab()\n",
115
+ " if j in sample:\n",
116
+ " # Load frame\n",
117
+ " success, frame = v_cap.retrieve()\n",
118
+ " if not success:\n",
119
+ " continue\n",
120
+ " frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
121
+ " # frame = Image.fromarray(frame)\n",
122
+ "\n",
123
+ " # Resize frame to desired size\n",
124
+ " if self.resize is not None:\n",
125
+ " frame = frame.resize([int(d * self.resize) for d in frame.size])\n",
126
+ " frames.append(frame)\n",
127
+ "\n",
128
+ " # When batch is full, detect faces and reset frame list\n",
129
+ " if len(frames) % self.batch_size == 0 or j == sample[-1]:\n",
130
+ "\n",
131
+ " boxes, probs = self.detector.detect(frames)\n",
132
+ "\n",
133
+ " for i in range(len(frames)):\n",
134
+ "\n",
135
+ " if boxes[i] is None:\n",
136
+ " faces.append(face2) #append previous face frame if no face is detected\n",
137
+ " continue\n",
138
+ "\n",
139
+ " box = boxes[i][0].astype(int)\n",
140
+ " frame = frames[i]\n",
141
+ " face = frame[box[1]:box[3], box[0]:box[2]]\n",
142
+ "\n",
143
+ " if not face.any():\n",
144
+ " faces.append(face2) #append previous face frame if no face is detected\n",
145
+ " continue\n",
146
+ "\n",
147
+ " face2 = cv2.resize(face, (224, 224))\n",
148
+ "\n",
149
+ " faces.append(face2)\n",
150
+ "\n",
151
+ " frames = []\n",
152
+ "\n",
153
+ " v_cap.release()\n",
154
+ " return faces\n",
155
+ "\n",
156
+ " elif self.input_modality == 'image':\n",
157
+ " print('Input modality is image.')\n",
158
+ " #Perform inference for image modality.\n",
159
+ " image = cv2.imread(filename)\n",
160
+ " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
161
+ " boxes, probs = self.detector.detect(image)\n",
162
+ "\n",
163
+ " if boxes is None:\n",
164
+ " print('No faces found')\n",
165
+ "\n",
166
+ " box = boxes[0].astype(int)\n",
167
+ " face = image[box[1]:box[3], box[0]:box[2]]\n",
168
+ " face = cv2.resize(face, (224, 224))\n",
169
+ "\n",
170
+ " if not face.any():\n",
171
+ " print(\"No faces found...\")\n",
172
+ "\n",
173
+ " return face\n",
174
+ " \n",
175
+ " else:\n",
176
+ " raise ValueError(\"Invalid input modality. Must be either 'video' or image\")"
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": null,
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": [
185
+ "detection_video_pipeline = DetectionPipeline(detector=mtcnn, n_frames=2, batch_size=1, input_modality='video')\n",
186
+ "def deepfakes_video_predict(input_video):\n",
187
+ "\n",
188
+ " faces = detection_video_pipeline(input_video)\n",
189
+ " total = 0\n",
190
+ " real = 0\n",
191
+ " fake = 0\n",
192
+ "\n",
193
+ " for face in faces:\n",
194
+ "\n",
195
+ " face2 = face/255\n",
196
+ " pred = model.predict(np.expand_dims(face2, axis=0))[0]\n",
197
+ " total+=1\n",
198
+ "\n",
199
+ " pred2 = pred[1]\n",
200
+ "\n",
201
+ " if pred2 > 0.5:\n",
202
+ " fake+=1\n",
203
+ " else:\n",
204
+ " real+=1\n",
205
+ "\n",
206
+ " fake_ratio = fake/total\n",
207
+ "\n",
208
+ " text =\"\"\n",
209
+ " text2 = \"Deepfakes Confidence: \" + str(fake_ratio*100) + \"%\"\n",
210
+ "\n",
211
+ " if fake_ratio >= 0.5:\n",
212
+ " text = \"The video is FAKE.\"\n",
213
+ " else:\n",
214
+ " text = \"The video is REAL.\"\n",
215
+ "\n",
216
+ " return text, text2\n",
217
+ "\n"
218
+ ]
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "execution_count": null,
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "detection_image_pipeline = DetectionPipeline(detector=mtcnn, batch_size = 1, input_modality = 'image')\n",
227
+ "def deepfakes_image_predict(input_image):\n",
228
+ " faces = detection_image_pipeline(input_image)\n",
229
+ " face2 = faces/255\n",
230
+ " pred = model.predict(np.expand_dims(face2, axis = 0))[0]\n",
231
+ " real, fake = pred[0], pred[1]\n",
232
+ " if real > 0.5:\n",
233
+ " text = \"The image is REAL.\"\n",
234
+ " text2 = \"Deepfakes Confidence: \" + str(round(real*100, 3)) + \"%\"\n",
235
+ " else:\n",
236
+ " text = \"The image is FAKE.\"\n",
237
+ " text2 = \"Deepfakes Confidence: \" + str(round(fake*100, 3)) + \"%\"\n",
238
+ " return text, text2"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": null,
244
+ "metadata": {},
245
+ "outputs": [
246
+ {
247
+ "name": "stdout",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "Input modality is image.\n",
251
+ "1/1 [==============================] - 0s 75ms/step\n",
252
+ "('The video is FAKE.', 'Deepfakes Confidence: 99.957%')\n",
253
+ "Input modality is image.\n",
254
+ "1/1 [==============================] - 0s 85ms/step\n",
255
+ "('The video is REAL.', 'Deepfakes Confidence: 99.992%')\n"
256
+ ]
257
+ }
258
+ ],
259
+ "source": [
260
+ "image_res = deepfakes_image_predict('fake_image.jpg')\n",
261
+ "print(image_res)\n",
262
+ "\n",
263
+ "image_res = deepfakes_image_predict('lady.jpg')\n",
264
+ "print(image_res)"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": null,
270
+ "metadata": {},
271
+ "outputs": [
272
+ {
273
+ "name": "stdout",
274
+ "output_type": "stream",
275
+ "text": [
276
+ "Input modality is video.\n",
277
+ "1/1 [==============================] - 0s 80ms/step\n",
278
+ "1/1 [==============================] - 0s 71ms/step\n",
279
+ "('The video is FAKE.', 'Deepfakes Confidence: 100.0%')\n"
280
+ ]
281
+ }
282
+ ],
283
+ "source": [
284
+ "video_dir = 'Video1-fake-1-ff.mp4'\n",
285
+ "videos = deepfakes_video_predict(video_dir)\n",
286
+ "print(videos)"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": null,
292
+ "metadata": {},
293
+ "outputs": [
294
+ {
295
+ "name": "stdout",
296
+ "output_type": "stream",
297
+ "text": [
298
+ "Input modality is video.\n",
299
+ "1/1 [==============================] - 0s 82ms/step\n",
300
+ "1/1 [==============================] - 0s 78ms/step\n",
301
+ "('The video is REAL.', 'Deepfakes Confidence: 0.0%')\n"
302
+ ]
303
+ }
304
+ ],
305
+ "source": [
306
+ "video_dir = 'real-1.mp4'\n",
307
+ "videos = deepfakes_video_predict(video_dir)\n",
308
+ "print(videos)"
309
+ ]
310
+ },
311
+ {
312
+ "attachments": {},
313
+ "cell_type": "markdown",
314
+ "metadata": {},
315
+ "source": [
316
+ "### Audio modality pipeline."
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "code",
321
+ "execution_count": 45,
322
+ "metadata": {},
323
+ "outputs": [],
324
+ "source": [
325
+ "#Load model.\n",
326
+ "import torch \n",
327
+ "import torch.nn as nn\n",
328
+ "import torch.nn.functional as F\n",
329
+ "from rawnet import SincConv, Residual_block\n",
330
+ "\n",
331
+ "\n",
332
+ "\n",
333
+ "d_args = {\n",
334
+ " \"nb_samp\": 64600,\n",
335
+ " \"first_conv\": 1024,\n",
336
+ " \"in_channels\": 1,\n",
337
+ " \"filts\": [20, [20, 20], [20, 128], [128, 128]],\n",
338
+ " \"blocks\": [2, 4],\n",
339
+ " \"nb_fc_node\": 1024,\n",
340
+ " \"gru_node\": 1024,\n",
341
+ " \"nb_gru_layer\": 3,\n",
342
+ " \"nb_classes\": 2}\n",
343
+ "\n",
344
+ "\n",
345
+ "class RawNet(nn.Module):\n",
346
+ " def __init__(self, d_args, device):\n",
347
+ " super(RawNet, self).__init__()\n",
348
+ "\n",
349
+ " \n",
350
+ " self.device=device\n",
351
+ "\n",
352
+ " self.Sinc_conv=SincConv(device=self.device,\n",
353
+ "\t\t\tout_channels = d_args['filts'][0],\n",
354
+ "\t\t\tkernel_size = d_args['first_conv'],\n",
355
+ " in_channels = d_args['in_channels']\n",
356
+ " )\n",
357
+ " \n",
358
+ " self.first_bn = nn.BatchNorm1d(num_features = d_args['filts'][0])\n",
359
+ " self.selu = nn.SELU(inplace=True)\n",
360
+ " self.block0 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][1], first = True))\n",
361
+ " self.block1 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][1]))\n",
362
+ " self.block2 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n",
363
+ " d_args['filts'][2][0] = d_args['filts'][2][1]\n",
364
+ " self.block3 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n",
365
+ " self.block4 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n",
366
+ " self.block5 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))\n",
367
+ " self.avgpool = nn.AdaptiveAvgPool1d(1)\n",
368
+ "\n",
369
+ " self.fc_attention0 = self._make_attention_fc(in_features = d_args['filts'][1][-1],\n",
370
+ " l_out_features = d_args['filts'][1][-1])\n",
371
+ " self.fc_attention1 = self._make_attention_fc(in_features = d_args['filts'][1][-1],\n",
372
+ " l_out_features = d_args['filts'][1][-1])\n",
373
+ " self.fc_attention2 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n",
374
+ " l_out_features = d_args['filts'][2][-1])\n",
375
+ " self.fc_attention3 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n",
376
+ " l_out_features = d_args['filts'][2][-1])\n",
377
+ " self.fc_attention4 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n",
378
+ " l_out_features = d_args['filts'][2][-1])\n",
379
+ " self.fc_attention5 = self._make_attention_fc(in_features = d_args['filts'][2][-1],\n",
380
+ " l_out_features = d_args['filts'][2][-1])\n",
381
+ "\n",
382
+ " self.bn_before_gru = nn.BatchNorm1d(num_features = d_args['filts'][2][-1])\n",
383
+ " self.gru = nn.GRU(input_size = d_args['filts'][2][-1],\n",
384
+ "\t\t\thidden_size = d_args['gru_node'],\n",
385
+ "\t\t\tnum_layers = d_args['nb_gru_layer'],\n",
386
+ "\t\t\tbatch_first = True)\n",
387
+ "\n",
388
+ " \n",
389
+ " self.fc1_gru = nn.Linear(in_features = d_args['gru_node'],\n",
390
+ "\t\t\tout_features = d_args['nb_fc_node'])\n",
391
+ " \n",
392
+ " self.fc2_gru = nn.Linear(in_features = d_args['nb_fc_node'],\n",
393
+ "\t\t\tout_features = d_args['nb_classes'],bias=True)\n",
394
+ "\t\t\t\n",
395
+ " \n",
396
+ " self.sig = nn.Sigmoid()\n",
397
+ " self.logsoftmax = nn.LogSoftmax(dim=1)\n",
398
+ " \n",
399
+ " def forward(self, x, y = None):\n",
400
+ " \n",
401
+ " \n",
402
+ " nb_samp = x.shape[0]\n",
403
+ " len_seq = x.shape[1]\n",
404
+ " x=x.view(nb_samp,1,len_seq)\n",
405
+ " \n",
406
+ " x = self.Sinc_conv(x) \n",
407
+ " x = F.max_pool1d(torch.abs(x), 3)\n",
408
+ " x = self.first_bn(x)\n",
409
+ " x = self.selu(x)\n",
410
+ " \n",
411
+ " x0 = self.block0(x)\n",
412
+ " y0 = self.avgpool(x0).view(x0.size(0), -1) # torch.Size([batch, filter])\n",
413
+ " y0 = self.fc_attention0(y0)\n",
414
+ " y0 = self.sig(y0).view(y0.size(0), y0.size(1), -1) # torch.Size([batch, filter, 1])\n",
415
+ " x = x0 * y0 + y0 # (batch, filter, time) x (batch, filter, 1)\n",
416
+ " \n",
417
+ "\n",
418
+ " x1 = self.block1(x)\n",
419
+ " y1 = self.avgpool(x1).view(x1.size(0), -1) # torch.Size([batch, filter])\n",
420
+ " y1 = self.fc_attention1(y1)\n",
421
+ " y1 = self.sig(y1).view(y1.size(0), y1.size(1), -1) # torch.Size([batch, filter, 1])\n",
422
+ " x = x1 * y1 + y1 # (batch, filter, time) x (batch, filter, 1)\n",
423
+ "\n",
424
+ " x2 = self.block2(x)\n",
425
+ " y2 = self.avgpool(x2).view(x2.size(0), -1) # torch.Size([batch, filter])\n",
426
+ " y2 = self.fc_attention2(y2)\n",
427
+ " y2 = self.sig(y2).view(y2.size(0), y2.size(1), -1) # torch.Size([batch, filter, 1])\n",
428
+ " x = x2 * y2 + y2 # (batch, filter, time) x (batch, filter, 1)\n",
429
+ "\n",
430
+ " x3 = self.block3(x)\n",
431
+ " y3 = self.avgpool(x3).view(x3.size(0), -1) # torch.Size([batch, filter])\n",
432
+ " y3 = self.fc_attention3(y3)\n",
433
+ " y3 = self.sig(y3).view(y3.size(0), y3.size(1), -1) # torch.Size([batch, filter, 1])\n",
434
+ " x = x3 * y3 + y3 # (batch, filter, time) x (batch, filter, 1)\n",
435
+ "\n",
436
+ " x4 = self.block4(x)\n",
437
+ " y4 = self.avgpool(x4).view(x4.size(0), -1) # torch.Size([batch, filter])\n",
438
+ " y4 = self.fc_attention4(y4)\n",
439
+ " y4 = self.sig(y4).view(y4.size(0), y4.size(1), -1) # torch.Size([batch, filter, 1])\n",
440
+ " x = x4 * y4 + y4 # (batch, filter, time) x (batch, filter, 1)\n",
441
+ "\n",
442
+ " x5 = self.block5(x)\n",
443
+ " y5 = self.avgpool(x5).view(x5.size(0), -1) # torch.Size([batch, filter])\n",
444
+ " y5 = self.fc_attention5(y5)\n",
445
+ " y5 = self.sig(y5).view(y5.size(0), y5.size(1), -1) # torch.Size([batch, filter, 1])\n",
446
+ " x = x5 * y5 + y5 # (batch, filter, time) x (batch, filter, 1)\n",
447
+ "\n",
448
+ " x = self.bn_before_gru(x)\n",
449
+ " x = self.selu(x)\n",
450
+ " x = x.permute(0, 2, 1) #(batch, filt, time) >> (batch, time, filt)\n",
451
+ " self.gru.flatten_parameters()\n",
452
+ " x, _ = self.gru(x)\n",
453
+ " x = x[:,-1,:]\n",
454
+ " x = self.fc1_gru(x)\n",
455
+ " x = self.fc2_gru(x)\n",
456
+ " output=self.logsoftmax(x)\n",
457
+ " \n",
458
+ " return output\n",
459
+ " \n",
460
+ " \n",
461
+ "\n",
462
+ " def _make_attention_fc(self, in_features, l_out_features):\n",
463
+ "\n",
464
+ " l_fc = []\n",
465
+ " \n",
466
+ " l_fc.append(nn.Linear(in_features = in_features,\n",
467
+ "\t\t\t out_features = l_out_features))\n",
468
+ "\n",
469
+ " \n",
470
+ "\n",
471
+ " return nn.Sequential(*l_fc)\n",
472
+ "\n",
473
+ "\n",
474
+ " def _make_layer(self, nb_blocks, nb_filts, first = False):\n",
475
+ " layers = []\n",
476
+ " #def __init__(self, nb_filts, first = False):\n",
477
+ " for i in range(nb_blocks):\n",
478
+ " first = first if i == 0 else False\n",
479
+ " layers.append(Residual_block(nb_filts = nb_filts,\n",
480
+ "\t\t\t\tfirst = first))\n",
481
+ " if i == 0: nb_filts[0] = nb_filts[1]\n",
482
+ " \n",
483
+ " return nn.Sequential(*layers)\n",
484
+ "\n",
485
+ " def summary(self, input_size, batch_size=-1, device=\"cuda\", print_fn = None):\n",
486
+ " if print_fn == None: printfn = print\n",
487
+ " model = self\n",
488
+ " \n",
489
+ " def register_hook(module):\n",
490
+ " def hook(module, input, output):\n",
491
+ " class_name = str(module.__class__).split(\".\")[-1].split(\"'\")[0]\n",
492
+ " module_idx = len(summary)\n",
493
+ " \n",
494
+ " m_key = \"%s-%i\" % (class_name, module_idx + 1)\n",
495
+ " summary[m_key] = OrderedDict()\n",
496
+ " summary[m_key][\"input_shape\"] = list(input[0].size())\n",
497
+ " summary[m_key][\"input_shape\"][0] = batch_size\n",
498
+ " if isinstance(output, (list, tuple)):\n",
499
+ " summary[m_key][\"output_shape\"] = [\n",
500
+ "\t\t\t\t\t\t[-1] + list(o.size())[1:] for o in output\n",
501
+ "\t\t\t\t\t]\n",
502
+ " else:\n",
503
+ " summary[m_key][\"output_shape\"] = list(output.size())\n",
504
+ " if len(summary[m_key][\"output_shape\"]) != 0:\n",
505
+ " summary[m_key][\"output_shape\"][0] = batch_size\n",
506
+ " \n",
507
+ " params = 0\n",
508
+ " if hasattr(module, \"weight\") and hasattr(module.weight, \"size\"):\n",
509
+ " params += torch.prod(torch.LongTensor(list(module.weight.size())))\n",
510
+ " summary[m_key][\"trainable\"] = module.weight.requires_grad\n",
511
+ " if hasattr(module, \"bias\") and hasattr(module.bias, \"size\"):\n",
512
+ " params += torch.prod(torch.LongTensor(list(module.bias.size())))\n",
513
+ " summary[m_key][\"nb_params\"] = params\n",
514
+ " \n",
515
+ " if (\n",
516
+ "\t\t\t\tnot isinstance(module, nn.Sequential)\n",
517
+ "\t\t\t\tand not isinstance(module, nn.ModuleList)\n",
518
+ "\t\t\t\tand not (module == model)\n",
519
+ "\t\t\t):\n",
520
+ " hooks.append(module.register_forward_hook(hook))\n",
521
+ " \n",
522
+ " device = device.lower()\n",
523
+ " assert device in [\n",
524
+ "\t\t\t\"cuda\",\n",
525
+ "\t\t\t\"cpu\",\n",
526
+ "\t\t], \"Input device is not valid, please specify 'cuda' or 'cpu'\"\n",
527
+ " \n",
528
+ " if device == \"cuda\" and torch.cuda.is_available():\n",
529
+ " dtype = torch.cuda.FloatTensor\n",
530
+ " else:\n",
531
+ " dtype = torch.FloatTensor\n",
532
+ " if isinstance(input_size, tuple):\n",
533
+ " input_size = [input_size]\n",
534
+ " x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]\n",
535
+ " summary = OrderedDict()\n",
536
+ " hooks = []\n",
537
+ " model.apply(register_hook)\n",
538
+ " model(*x)\n",
539
+ " for h in hooks:\n",
540
+ " h.remove()\n",
541
+ " \n",
542
+ " print_fn(\"----------------------------------------------------------------\")\n",
543
+ " line_new = \"{:>20} {:>25} {:>15}\".format(\"Layer (type)\", \"Output Shape\", \"Param #\")\n",
544
+ " print_fn(line_new)\n",
545
+ " print_fn(\"================================================================\")\n",
546
+ " total_params = 0\n",
547
+ " total_output = 0\n",
548
+ " trainable_params = 0\n",
549
+ " for layer in summary:\n",
550
+ " # input_shape, output_shape, trainable, nb_params\n",
551
+ " line_new = \"{:>20} {:>25} {:>15}\".format(\n",
552
+ "\t\t\t\tlayer,\n",
553
+ "\t\t\t\tstr(summary[layer][\"output_shape\"]),\n",
554
+ "\t\t\t\t\"{0:,}\".format(summary[layer][\"nb_params\"]),\n",
555
+ "\t\t\t)\n",
556
+ " total_params += summary[layer][\"nb_params\"]\n",
557
+ " total_output += np.prod(summary[layer][\"output_shape\"])\n",
558
+ " if \"trainable\" in summary[layer]:\n",
559
+ " if summary[layer][\"trainable\"] == True:\n",
560
+ " trainable_params += summary[layer][\"nb_params\"]\n",
561
+ " print_fn(line_new)\n"
562
+ ]
563
+ },
564
+ {
565
+ "cell_type": "code",
566
+ "execution_count": 46,
567
+ "metadata": {},
568
+ "outputs": [
569
+ {
570
+ "data": {
571
+ "text/plain": [
572
+ "<All keys matched successfully>"
573
+ ]
574
+ },
575
+ "execution_count": 46,
576
+ "metadata": {},
577
+ "output_type": "execute_result"
578
+ }
579
+ ],
580
+ "source": [
581
+ "model = RawNet(d_args = d_args, device = 'cpu')\n",
582
+ "model_dict = model.state_dict()\n",
583
+ "ckpt = torch.load('pre_trained_DF_RawNet2.pth', map_location = torch.device('cpu'))\n",
584
+ "model.load_state_dict(ckpt, model_dict)"
585
+ ]
586
+ },
587
+ {
588
+ "cell_type": "code",
589
+ "execution_count": 47,
590
+ "metadata": {},
591
+ "outputs": [],
592
+ "source": [
593
+ "import librosa"
594
+ ]
595
+ },
596
+ {
597
+ "cell_type": "code",
598
+ "execution_count": 48,
599
+ "metadata": {},
600
+ "outputs": [],
601
+ "source": [
602
+ "def load_and_preprocess_audio(audio_path):\n",
603
+ " '''Loads and returns a torch tensor object'''\n",
604
+ " x, sr = librosa.load(audio_path)\n",
605
+ " x_pt = torch.Tensor(x)\n",
606
+ " x_pt = torch.unsqueeze(x_pt, dim = 0)\n",
607
+ " return x_pt"
608
+ ]
609
+ },
610
+ {
611
+ "cell_type": "code",
612
+ "execution_count": 49,
613
+ "metadata": {},
614
+ "outputs": [
615
+ {
616
+ "data": {
617
+ "text/plain": [
618
+ "tensor([[2.5792e-05, 3.1405e-05, 4.5405e-05, ..., 0.0000e+00, 0.0000e+00,\n",
619
+ " 0.0000e+00]])"
620
+ ]
621
+ },
622
+ "execution_count": 49,
623
+ "metadata": {},
624
+ "output_type": "execute_result"
625
+ }
626
+ ],
627
+ "source": [
628
+ "ds = load_and_preprocess_audio(audio_path = 'audios/DF_E_2000027.flac')\n",
629
+ "ds"
630
+ ]
631
+ },
632
+ {
633
+ "cell_type": "code",
634
+ "execution_count": 50,
635
+ "metadata": {},
636
+ "outputs": [],
637
+ "source": [
638
+ "grads = model(ds)"
639
+ ]
640
+ },
641
+ {
642
+ "cell_type": "code",
643
+ "execution_count": null,
644
+ "metadata": {},
645
+ "outputs": [
646
+ {
647
+ "data": {
648
+ "text/plain": [
649
+ "tensor([[-6.5565e-06, -1.1934e+01]], grad_fn=<LogSoftmaxBackward0>)"
650
+ ]
651
+ },
652
+ "execution_count": 39,
653
+ "metadata": {},
654
+ "output_type": "execute_result"
655
+ }
656
+ ],
657
+ "source": [
658
+ "grads"
659
+ ]
660
+ },
661
+ {
662
+ "cell_type": "code",
663
+ "execution_count": 78,
664
+ "metadata": {},
665
+ "outputs": [],
666
+ "source": [
667
+ "batch = grads[:, 1].data.cpu().numpy().ravel()"
668
+ ]
669
+ },
670
+ {
671
+ "cell_type": "code",
672
+ "execution_count": 79,
673
+ "metadata": {},
674
+ "outputs": [
675
+ {
676
+ "data": {
677
+ "text/plain": [
678
+ "array([-11.933539], dtype=float32)"
679
+ ]
680
+ },
681
+ "execution_count": 79,
682
+ "metadata": {},
683
+ "output_type": "execute_result"
684
+ }
685
+ ],
686
+ "source": [
687
+ "batch"
688
+ ]
689
+ },
690
+ {
691
+ "cell_type": "code",
692
+ "execution_count": 82,
693
+ "metadata": {},
694
+ "outputs": [],
695
+ "source": [
696
+ "_, batch_pred = grads.max(dim=1)"
697
+ ]
698
+ },
699
+ {
700
+ "cell_type": "code",
701
+ "execution_count": 83,
702
+ "metadata": {},
703
+ "outputs": [
704
+ {
705
+ "data": {
706
+ "text/plain": [
707
+ "tensor([0])"
708
+ ]
709
+ },
710
+ "execution_count": 83,
711
+ "metadata": {},
712
+ "output_type": "execute_result"
713
+ }
714
+ ],
715
+ "source": [
716
+ "batch_pred"
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": 58,
722
+ "metadata": {},
723
+ "outputs": [
724
+ {
725
+ "data": {
726
+ "text/plain": [
727
+ "1"
728
+ ]
729
+ },
730
+ "execution_count": 58,
731
+ "metadata": {},
732
+ "output_type": "execute_result"
733
+ }
734
+ ],
735
+ "source": [
736
+ "res = np.argmin(grads.detach().numpy())\n",
737
+ "res"
738
+ ]
739
+ },
740
+ {
741
+ "cell_type": "code",
742
+ "execution_count": 63,
743
+ "metadata": {},
744
+ "outputs": [
745
+ {
746
+ "data": {
747
+ "text/plain": [
748
+ "-11.933546"
749
+ ]
750
+ },
751
+ "execution_count": 63,
752
+ "metadata": {},
753
+ "output_type": "execute_result"
754
+ }
755
+ ],
756
+ "source": [
757
+ "grads.detach().numpy()[0][0] + grads.detach().numpy()[0][1]"
758
+ ]
759
+ },
760
+ {
761
+ "cell_type": "code",
762
+ "execution_count": null,
763
+ "metadata": {},
764
+ "outputs": [],
765
+ "source": []
766
+ }
767
+ ],
768
+ "metadata": {
769
+ "kernelspec": {
770
+ "display_name": "base",
771
+ "language": "python",
772
+ "name": "python3"
773
+ },
774
+ "language_info": {
775
+ "codemirror_mode": {
776
+ "name": "ipython",
777
+ "version": 3
778
+ },
779
+ "file_extension": ".py",
780
+ "mimetype": "text/x-python",
781
+ "name": "python",
782
+ "nbconvert_exporter": "python",
783
+ "pygments_lexer": "ipython3",
784
+ "version": "3.9.13"
785
+ },
786
+ "orig_nbformat": 4
787
+ },
788
+ "nbformat": 4,
789
+ "nbformat_minor": 2
790
+ }
pipeline.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import zipfile
5
+ import librosa
6
+ import numpy as np
7
+ import tensorflow as tf
8
+ import tensorflow_addons
9
+ from facenet_pytorch import MTCNN
10
+ from rawnet import RawNet
11
+
12
+ local_zip = "./efficientnet-b0.zip"
13
+ zip_ref = zipfile.ZipFile(local_zip, 'r')
14
+ zip_ref.extractall()
15
+ zip_ref.close()
16
+
17
+
18
+ # Load models.
19
+ mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')
20
+ model = tf.keras.models.load_model("efficientnet-b0/")
21
+
22
+
23
+
24
+ class DetectionPipeline:
25
+ """Pipeline class for detecting faces in the frames of a video file."""
26
+
27
+ def __init__(self, detector, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):
28
+ """Constructor for DetectionPipeline class.
29
+
30
+ Keyword Arguments:
31
+ n_frames {int} -- Total number of frames to load. These will be evenly spaced
32
+ throughout the video. If not specified (i.e., None), all frames will be loaded.
33
+ (default: {None})
34
+ batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32})
35
+ resize {float} -- Fraction by which to resize frames from original prior to face
36
+ detection. A value less than 1 results in downsampling and a value greater than
37
+ 1 result in upsampling. (default: {None})
38
+ """
39
+ self.detector = detector
40
+ self.n_frames = n_frames
41
+ self.batch_size = batch_size
42
+ self.resize = resize
43
+ self.input_modality = input_modality
44
+
45
+ def __call__(self, filename):
46
+ """Load frames from an MP4 video and detect faces.
47
+
48
+ Arguments:
49
+ filename {str} -- Path to video.
50
+ """
51
+ # Create video reader and find length
52
+ if self.input_modality == 'video':
53
+ print('Input modality is video.')
54
+ v_cap = cv2.VideoCapture(filename)
55
+ v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
56
+
57
+ # Pick 'n_frames' evenly spaced frames to sample
58
+ if self.n_frames is None:
59
+ sample = np.arange(0, v_len)
60
+ else:
61
+ sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)
62
+
63
+ # Loop through frames
64
+ faces = []
65
+ frames = []
66
+ for j in range(v_len):
67
+ success = v_cap.grab()
68
+ if j in sample:
69
+ # Load frame
70
+ success, frame = v_cap.retrieve()
71
+ if not success:
72
+ continue
73
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
74
+
75
+ # Resize frame to desired size
76
+ if self.resize is not None:
77
+ frame = frame.resize([int(d * self.resize) for d in frame.size])
78
+ frames.append(frame)
79
+
80
+ # When batch is full, detect faces and reset frame list
81
+ if len(frames) % self.batch_size == 0 or j == sample[-1]:
82
+
83
+ boxes, _ = self.detector.detect(frames)
84
+
85
+ for i in range(len(frames)):
86
+ if boxes[i] is None:
87
+ faces.append(face2) #append previous face frame if no face is detected
88
+ continue
89
+
90
+ box = boxes[i][0].astype(int)
91
+ frame = frames[i]
92
+ face = frame[box[1]:box[3], box[0]:box[2]]
93
+
94
+ if not face.any():
95
+ faces.append(face2) #append previous face frame if no face is detected
96
+ continue
97
+
98
+ face2 = cv2.resize(face, (224, 224))
99
+ faces.append(face2)
100
+ frames = []
101
+ v_cap.release()
102
+ return faces
103
+
104
+ elif self.input_modality == 'image':
105
+ print('Input modality is image.')
106
+ #Perform inference for image modality.
107
+ print('Reading image')
108
+ # print(f"Image path is: {filename}")
109
+ # image = cv2.imread(filename)
110
+ image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
111
+ boxes, _ = self.detector.detect(image)
112
+
113
+ if boxes is None:
114
+ print('No faces found')
115
+
116
+ box = boxes[0].astype(int)
117
+ face = image[box[1]:box[3], box[0]:box[2]]
118
+ face = cv2.resize(face, (224, 224))
119
+
120
+ if not face.any():
121
+ print("No faces found...")
122
+
123
+ return face
124
+
125
+ elif self.input_modality == 'audio':
126
+ print("INput modality is audio.")
127
+
128
+ #Load audio.
129
+ x, sr = librosa.load(filename)
130
+ x_pt = torch.Tensor(x)
131
+ x_pt = torch.unsqueeze(x_pt, dim = 0)
132
+ return x_pt
133
+
134
+ else:
135
+ raise ValueError("Invalid input modality. Must be either 'video' or image")
136
+
137
+ detection_video_pipeline = DetectionPipeline(detector=mtcnn, n_frames=5, batch_size=1, input_modality='video')
138
+ detection_image_pipeline = DetectionPipeline(detector=mtcnn, batch_size = 1, input_modality = 'image')
139
+
140
+ def deepfakes_video_predict(input_video):
141
+
142
+ faces = detection_video_pipeline(input_video)
143
+ total = 0
144
+ real_res = []
145
+ fake_res = []
146
+
147
+ for face in faces:
148
+
149
+ face2 = face/255
150
+ pred = model.predict(np.expand_dims(face2, axis=0))[0]
151
+ real, fake = pred[0], pred[1]
152
+ real_res.append(real)
153
+ fake_res.append(fake)
154
+
155
+ total+=1
156
+
157
+ pred2 = pred[1]
158
+
159
+ if pred2 > 0.5:
160
+ fake+=1
161
+ else:
162
+ real+=1
163
+ real_mean = np.mean(real_res)
164
+ fake_mean = np.mean(fake_res)
165
+ print(f"Real Faces: {real_mean}")
166
+ print(f"Fake Faces: {fake_mean}")
167
+ text = ""
168
+
169
+ if real_mean >= 0.5:
170
+ text = "The video is REAL. \n Deepfakes Confidence: " + str(round(100 - (real_mean*100), 3)) + "%"
171
+ else:
172
+ text = "The video is FAKE. \n Deepfakes Confidence: " + str(round(fake_mean*100, 3)) + "%"
173
+
174
+ return text
175
+
176
+
177
+ def deepfakes_image_predict(input_image):
178
+ faces = detection_image_pipeline(input_image)
179
+ face2 = faces/255
180
+ pred = model.predict(np.expand_dims(face2, axis = 0))[0]
181
+ real, fake = pred[0], pred[1]
182
+ if real > 0.5:
183
+ text2 = "The image is REAL. \n Deepfakes Confidence: " + str(round(100 - (real*100), 3)) + "%"
184
+ else:
185
+ text2 = "The image is FAKE. \n Deepfakes Confidence: " + str(round(fake*100, 3)) + "%"
186
+ return text2
187
+
188
+ def load_audio_model():
189
+ d_args = {
190
+ "nb_samp": 64600,
191
+ "first_conv": 1024,
192
+ "in_channels": 1,
193
+ "filts": [20, [20, 20], [20, 128], [128, 128]],
194
+ "blocks": [2, 4],
195
+ "nb_fc_node": 1024,
196
+ "gru_node": 1024,
197
+ "nb_gru_layer": 3,
198
+ "nb_classes": 2}
199
+
200
+ model = RawNet(d_args = d_args, device='cpu')
201
+
202
+ #Load pretrained ckpt.
203
+ model_dict = model.state_dict()
204
+ ckpt = torch.load('pre_trained_DF_RawNet2.pth', map_location=torch.device('cpu'))
205
+ model = model.load_state_dict(ckpt, model_dict)
206
+ return model
207
+
208
+ def deepfakes_audio_predict(input_audio):
209
+ #Perform inference on audio.
210
+ x, sr = librosa.load(input_audio)
211
+ x_pt = torch.Tensor(x)
212
+ x_pt = torch.unsqueeze(x_pt, dim = 0)
213
+
214
+ #Load model.
215
+ model = load_audio_model()
216
+
217
+ #Perform inference.
218
+ grads = model(x_pt)
219
+
220
+ #Get the argmax.
221
+ grads_np = grads.detach().numpy()
222
+ result = np.argmax(grads_np)
223
+ return result
pre_trained_DF_RawNet2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d8ad5f524a0f600c7c876d7a157a8f06c44a03504d0b2795c852f5e42c9127
3
+ size 70515422
rawnet.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torch import Tensor
5
+ import numpy as np
6
+ from torch.utils import data
7
+ from collections import OrderedDict
8
+ from torch.nn.parameter import Parameter
9
+
10
+
11
+ ___author__ = "Hemlata Tak"
12
+ __email__ = "[email protected]"
13
+
14
+
15
+ class SincConv(nn.Module):
16
+ @staticmethod
17
+ def to_mel(hz):
18
+ return 2595 * np.log10(1 + hz / 700)
19
+
20
+ @staticmethod
21
+ def to_hz(mel):
22
+ return 700 * (10 ** (mel / 2595) - 1)
23
+
24
+
25
+ def __init__(self, device,out_channels, kernel_size,in_channels=1,sample_rate=16000,
26
+ stride=1, padding=0, dilation=1, bias=False, groups=1):
27
+
28
+ super(SincConv,self).__init__()
29
+
30
+ if in_channels != 1:
31
+
32
+ msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels)
33
+ raise ValueError(msg)
34
+
35
+ self.out_channels = out_channels
36
+ self.kernel_size = kernel_size
37
+ self.sample_rate=sample_rate
38
+
39
+ # Forcing the filters to be odd (i.e, perfectly symmetrics)
40
+ if kernel_size%2==0:
41
+ self.kernel_size=self.kernel_size+1
42
+
43
+ self.device=device
44
+ self.stride = stride
45
+ self.padding = padding
46
+ self.dilation = dilation
47
+
48
+ if bias:
49
+ raise ValueError('SincConv does not support bias.')
50
+ if groups > 1:
51
+ raise ValueError('SincConv does not support groups.')
52
+
53
+
54
+ # initialize filterbanks using Mel scale
55
+ NFFT = 512
56
+ f=int(self.sample_rate/2)*np.linspace(0,1,int(NFFT/2)+1)
57
+ fmel=self.to_mel(f) # Hz to mel conversion
58
+ fmelmax=np.max(fmel)
59
+ fmelmin=np.min(fmel)
60
+ filbandwidthsmel=np.linspace(fmelmin,fmelmax,self.out_channels+1)
61
+ filbandwidthsf=self.to_hz(filbandwidthsmel) # Mel to Hz conversion
62
+ self.mel=filbandwidthsf
63
+ self.hsupp=torch.arange(-(self.kernel_size-1)/2, (self.kernel_size-1)/2+1)
64
+ self.band_pass=torch.zeros(self.out_channels,self.kernel_size)
65
+
66
+
67
+
68
+ def forward(self,x):
69
+ for i in range(len(self.mel)-1):
70
+ fmin=self.mel[i]
71
+ fmax=self.mel[i+1]
72
+ hHigh=(2*fmax/self.sample_rate)*np.sinc(2*fmax*self.hsupp/self.sample_rate)
73
+ hLow=(2*fmin/self.sample_rate)*np.sinc(2*fmin*self.hsupp/self.sample_rate)
74
+ hideal=hHigh-hLow
75
+
76
+ self.band_pass[i,:]=Tensor(np.hamming(self.kernel_size))*Tensor(hideal)
77
+
78
+ band_pass_filter=self.band_pass.to(self.device)
79
+
80
+ self.filters = (band_pass_filter).view(self.out_channels, 1, self.kernel_size)
81
+
82
+ return F.conv1d(x, self.filters, stride=self.stride,
83
+ padding=self.padding, dilation=self.dilation,
84
+ bias=None, groups=1)
85
+
86
+
87
+
88
+ class Residual_block(nn.Module):
89
+ def __init__(self, nb_filts, first = False):
90
+ super(Residual_block, self).__init__()
91
+ self.first = first
92
+
93
+ if not self.first:
94
+ self.bn1 = nn.BatchNorm1d(num_features = nb_filts[0])
95
+
96
+ self.lrelu = nn.LeakyReLU(negative_slope=0.3)
97
+
98
+ self.conv1 = nn.Conv1d(in_channels = nb_filts[0],
99
+ out_channels = nb_filts[1],
100
+ kernel_size = 3,
101
+ padding = 1,
102
+ stride = 1)
103
+
104
+ self.bn2 = nn.BatchNorm1d(num_features = nb_filts[1])
105
+ self.conv2 = nn.Conv1d(in_channels = nb_filts[1],
106
+ out_channels = nb_filts[1],
107
+ padding = 1,
108
+ kernel_size = 3,
109
+ stride = 1)
110
+
111
+ if nb_filts[0] != nb_filts[1]:
112
+ self.downsample = True
113
+ self.conv_downsample = nn.Conv1d(in_channels = nb_filts[0],
114
+ out_channels = nb_filts[1],
115
+ padding = 0,
116
+ kernel_size = 1,
117
+ stride = 1)
118
+
119
+ else:
120
+ self.downsample = False
121
+ self.mp = nn.MaxPool1d(3)
122
+
123
+ def forward(self, x):
124
+ identity = x
125
+ if not self.first:
126
+ out = self.bn1(x)
127
+ out = self.lrelu(out)
128
+ else:
129
+ out = x
130
+
131
+ out = self.conv1(x)
132
+ out = self.bn2(out)
133
+ out = self.lrelu(out)
134
+ out = self.conv2(out)
135
+
136
+ if self.downsample:
137
+ identity = self.conv_downsample(identity)
138
+
139
+ out += identity
140
+ out = self.mp(out)
141
+ return out
142
+
143
+
144
+
145
+
146
+
147
+ class RawNet(nn.Module):
148
+ def __init__(self, d_args, device):
149
+ super(RawNet, self).__init__()
150
+
151
+
152
+ self.device=device
153
+
154
+ self.Sinc_conv=SincConv(device=self.device,
155
+ out_channels = d_args['filts'][0],
156
+ kernel_size = d_args['first_conv'],
157
+ in_channels = d_args['in_channels']
158
+ )
159
+
160
+ self.first_bn = nn.BatchNorm1d(num_features = d_args['filts'][0])
161
+ self.selu = nn.SELU(inplace=True)
162
+ self.block0 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][1], first = True))
163
+ self.block1 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][1]))
164
+ self.block2 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))
165
+ d_args['filts'][2][0] = d_args['filts'][2][1]
166
+ self.block3 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))
167
+ self.block4 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))
168
+ self.block5 = nn.Sequential(Residual_block(nb_filts = d_args['filts'][2]))
169
+ self.avgpool = nn.AdaptiveAvgPool1d(1)
170
+
171
+ self.fc_attention0 = self._make_attention_fc(in_features = d_args['filts'][1][-1],
172
+ l_out_features = d_args['filts'][1][-1])
173
+ self.fc_attention1 = self._make_attention_fc(in_features = d_args['filts'][1][-1],
174
+ l_out_features = d_args['filts'][1][-1])
175
+ self.fc_attention2 = self._make_attention_fc(in_features = d_args['filts'][2][-1],
176
+ l_out_features = d_args['filts'][2][-1])
177
+ self.fc_attention3 = self._make_attention_fc(in_features = d_args['filts'][2][-1],
178
+ l_out_features = d_args['filts'][2][-1])
179
+ self.fc_attention4 = self._make_attention_fc(in_features = d_args['filts'][2][-1],
180
+ l_out_features = d_args['filts'][2][-1])
181
+ self.fc_attention5 = self._make_attention_fc(in_features = d_args['filts'][2][-1],
182
+ l_out_features = d_args['filts'][2][-1])
183
+
184
+ self.bn_before_gru = nn.BatchNorm1d(num_features = d_args['filts'][2][-1])
185
+ self.gru = nn.GRU(input_size = d_args['filts'][2][-1],
186
+ hidden_size = d_args['gru_node'],
187
+ num_layers = d_args['nb_gru_layer'],
188
+ batch_first = True)
189
+
190
+
191
+ self.fc1_gru = nn.Linear(in_features = d_args['gru_node'],
192
+ out_features = d_args['nb_fc_node'])
193
+
194
+ self.fc2_gru = nn.Linear(in_features = d_args['nb_fc_node'],
195
+ out_features = d_args['nb_classes'],bias=True)
196
+
197
+
198
+ self.sig = nn.Sigmoid()
199
+ self.logsoftmax = nn.LogSoftmax(dim=1)
200
+
201
+ def forward(self, x, y = None):
202
+
203
+
204
+ nb_samp = x.shape[0]
205
+ len_seq = x.shape[1]
206
+ x=x.view(nb_samp,1,len_seq)
207
+
208
+ x = self.Sinc_conv(x)
209
+ x = F.max_pool1d(torch.abs(x), 3)
210
+ x = self.first_bn(x)
211
+ x = self.selu(x)
212
+
213
+ x0 = self.block0(x)
214
+ y0 = self.avgpool(x0).view(x0.size(0), -1) # torch.Size([batch, filter])
215
+ y0 = self.fc_attention0(y0)
216
+ y0 = self.sig(y0).view(y0.size(0), y0.size(1), -1) # torch.Size([batch, filter, 1])
217
+ x = x0 * y0 + y0 # (batch, filter, time) x (batch, filter, 1)
218
+
219
+
220
+ x1 = self.block1(x)
221
+ y1 = self.avgpool(x1).view(x1.size(0), -1) # torch.Size([batch, filter])
222
+ y1 = self.fc_attention1(y1)
223
+ y1 = self.sig(y1).view(y1.size(0), y1.size(1), -1) # torch.Size([batch, filter, 1])
224
+ x = x1 * y1 + y1 # (batch, filter, time) x (batch, filter, 1)
225
+
226
+ x2 = self.block2(x)
227
+ y2 = self.avgpool(x2).view(x2.size(0), -1) # torch.Size([batch, filter])
228
+ y2 = self.fc_attention2(y2)
229
+ y2 = self.sig(y2).view(y2.size(0), y2.size(1), -1) # torch.Size([batch, filter, 1])
230
+ x = x2 * y2 + y2 # (batch, filter, time) x (batch, filter, 1)
231
+
232
+ x3 = self.block3(x)
233
+ y3 = self.avgpool(x3).view(x3.size(0), -1) # torch.Size([batch, filter])
234
+ y3 = self.fc_attention3(y3)
235
+ y3 = self.sig(y3).view(y3.size(0), y3.size(1), -1) # torch.Size([batch, filter, 1])
236
+ x = x3 * y3 + y3 # (batch, filter, time) x (batch, filter, 1)
237
+
238
+ x4 = self.block4(x)
239
+ y4 = self.avgpool(x4).view(x4.size(0), -1) # torch.Size([batch, filter])
240
+ y4 = self.fc_attention4(y4)
241
+ y4 = self.sig(y4).view(y4.size(0), y4.size(1), -1) # torch.Size([batch, filter, 1])
242
+ x = x4 * y4 + y4 # (batch, filter, time) x (batch, filter, 1)
243
+
244
+ x5 = self.block5(x)
245
+ y5 = self.avgpool(x5).view(x5.size(0), -1) # torch.Size([batch, filter])
246
+ y5 = self.fc_attention5(y5)
247
+ y5 = self.sig(y5).view(y5.size(0), y5.size(1), -1) # torch.Size([batch, filter, 1])
248
+ x = x5 * y5 + y5 # (batch, filter, time) x (batch, filter, 1)
249
+
250
+ x = self.bn_before_gru(x)
251
+ x = self.selu(x)
252
+ x = x.permute(0, 2, 1) #(batch, filt, time) >> (batch, time, filt)
253
+ self.gru.flatten_parameters()
254
+ x, _ = self.gru(x)
255
+ x = x[:,-1,:]
256
+ x = self.fc1_gru(x)
257
+ x = self.fc2_gru(x)
258
+ output=self.logsoftmax(x)
259
+
260
+ return output
261
+
262
+
263
+
264
+ def _make_attention_fc(self, in_features, l_out_features):
265
+
266
+ l_fc = []
267
+
268
+ l_fc.append(nn.Linear(in_features = in_features,
269
+ out_features = l_out_features))
270
+
271
+
272
+
273
+ return nn.Sequential(*l_fc)
274
+
275
+
276
+ def _make_layer(self, nb_blocks, nb_filts, first = False):
277
+ layers = []
278
+ #def __init__(self, nb_filts, first = False):
279
+ for i in range(nb_blocks):
280
+ first = first if i == 0 else False
281
+ layers.append(Residual_block(nb_filts = nb_filts,
282
+ first = first))
283
+ if i == 0: nb_filts[0] = nb_filts[1]
284
+
285
+ return nn.Sequential(*layers)
286
+
287
+ def summary(self, input_size, batch_size=-1, device="cuda", print_fn = None):
288
+ if print_fn == None: printfn = print
289
+ model = self
290
+
291
+ def register_hook(module):
292
+ def hook(module, input, output):
293
+ class_name = str(module.__class__).split(".")[-1].split("'")[0]
294
+ module_idx = len(summary)
295
+
296
+ m_key = "%s-%i" % (class_name, module_idx + 1)
297
+ summary[m_key] = OrderedDict()
298
+ summary[m_key]["input_shape"] = list(input[0].size())
299
+ summary[m_key]["input_shape"][0] = batch_size
300
+ if isinstance(output, (list, tuple)):
301
+ summary[m_key]["output_shape"] = [
302
+ [-1] + list(o.size())[1:] for o in output
303
+ ]
304
+ else:
305
+ summary[m_key]["output_shape"] = list(output.size())
306
+ if len(summary[m_key]["output_shape"]) != 0:
307
+ summary[m_key]["output_shape"][0] = batch_size
308
+
309
+ params = 0
310
+ if hasattr(module, "weight") and hasattr(module.weight, "size"):
311
+ params += torch.prod(torch.LongTensor(list(module.weight.size())))
312
+ summary[m_key]["trainable"] = module.weight.requires_grad
313
+ if hasattr(module, "bias") and hasattr(module.bias, "size"):
314
+ params += torch.prod(torch.LongTensor(list(module.bias.size())))
315
+ summary[m_key]["nb_params"] = params
316
+
317
+ if (
318
+ not isinstance(module, nn.Sequential)
319
+ and not isinstance(module, nn.ModuleList)
320
+ and not (module == model)
321
+ ):
322
+ hooks.append(module.register_forward_hook(hook))
323
+
324
+ device = device.lower()
325
+ assert device in [
326
+ "cuda",
327
+ "cpu",
328
+ ], "Input device is not valid, please specify 'cuda' or 'cpu'"
329
+
330
+ if device == "cuda" and torch.cuda.is_available():
331
+ dtype = torch.cuda.FloatTensor
332
+ else:
333
+ dtype = torch.FloatTensor
334
+ if isinstance(input_size, tuple):
335
+ input_size = [input_size]
336
+ x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
337
+ summary = OrderedDict()
338
+ hooks = []
339
+ model.apply(register_hook)
340
+ model(*x)
341
+ for h in hooks:
342
+ h.remove()
343
+
344
+ print_fn("----------------------------------------------------------------")
345
+ line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
346
+ print_fn(line_new)
347
+ print_fn("================================================================")
348
+ total_params = 0
349
+ total_output = 0
350
+ trainable_params = 0
351
+ for layer in summary:
352
+ # input_shape, output_shape, trainable, nb_params
353
+ line_new = "{:>20} {:>25} {:>15}".format(
354
+ layer,
355
+ str(summary[layer]["output_shape"]),
356
+ "{0:,}".format(summary[layer]["nb_params"]),
357
+ )
358
+ total_params += summary[layer]["nb_params"]
359
+ total_output += np.prod(summary[layer]["output_shape"])
360
+ if "trainable" in summary[layer]:
361
+ if summary[layer]["trainable"] == True:
362
+ trainable_params += summary[layer]["nb_params"]
363
+ print_fn(line_new)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ tensorflow
2
+ tensorflow-addons
3
+ facenet_pytorch
4
+ numpy
5
+ opencv-python
6
+ opencv-python-headless
7
+ mtcnn
8
+ moviepy
videos/celeb_synthesis.mp4 ADDED
Binary file (209 kB). View file
 
videos/real-1.mp4 ADDED
Binary file (631 kB). View file