Spaces:
Runtime error
Runtime error
remove mtcnn detector
Browse files- __pycache__/pipeline.cpython-39.pyc +0 -0
- __pycache__/rawnet.cpython-39.pyc +0 -0
- pipeline.py +11 -37
__pycache__/pipeline.cpython-39.pyc
CHANGED
Binary files a/__pycache__/pipeline.cpython-39.pyc and b/__pycache__/pipeline.cpython-39.pyc differ
|
|
__pycache__/rawnet.cpython-39.pyc
CHANGED
Binary files a/__pycache__/rawnet.cpython-39.pyc and b/__pycache__/rawnet.cpython-39.pyc differ
|
|
pipeline.py
CHANGED
@@ -16,7 +16,6 @@ zip_ref.close()
|
|
16 |
|
17 |
|
18 |
# Load models.
|
19 |
-
mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')
|
20 |
model = tf.keras.models.load_model("efficientnet-b0/")
|
21 |
|
22 |
|
@@ -24,7 +23,7 @@ model = tf.keras.models.load_model("efficientnet-b0/")
|
|
24 |
class DetectionPipeline:
|
25 |
"""Pipeline class for detecting faces in the frames of a video file."""
|
26 |
|
27 |
-
def __init__(self,
|
28 |
"""Constructor for DetectionPipeline class.
|
29 |
|
30 |
Keyword Arguments:
|
@@ -36,7 +35,6 @@ class DetectionPipeline:
|
|
36 |
detection. A value less than 1 results in downsampling and a value greater than
|
37 |
1 result in upsampling. (default: {None})
|
38 |
"""
|
39 |
-
self.detector = detector
|
40 |
self.n_frames = n_frames
|
41 |
self.batch_size = batch_size
|
42 |
self.resize = resize
|
@@ -79,25 +77,9 @@ class DetectionPipeline:
|
|
79 |
|
80 |
# When batch is full, detect faces and reset frame list
|
81 |
if len(frames) % self.batch_size == 0 or j == sample[-1]:
|
|
|
|
|
82 |
|
83 |
-
boxes, _ = self.detector.detect(frames)
|
84 |
-
|
85 |
-
for i in range(len(frames)):
|
86 |
-
if boxes[i] is None:
|
87 |
-
faces.append(face2) #append previous face frame if no face is detected
|
88 |
-
continue
|
89 |
-
|
90 |
-
box = boxes[i][0].astype(int)
|
91 |
-
frame = frames[i]
|
92 |
-
face = frame[box[1]:box[3], box[0]:box[2]]
|
93 |
-
|
94 |
-
if not face.any():
|
95 |
-
faces.append(face2) #append previous face frame if no face is detected
|
96 |
-
continue
|
97 |
-
|
98 |
-
face2 = cv2.resize(face, (224, 224))
|
99 |
-
faces.append(face2)
|
100 |
-
frames = []
|
101 |
v_cap.release()
|
102 |
return faces
|
103 |
|
@@ -106,21 +88,13 @@ class DetectionPipeline:
|
|
106 |
#Perform inference for image modality.
|
107 |
print('Reading image')
|
108 |
# print(f"Image path is: {filename}")
|
109 |
-
# image = cv2.imread(filename)
|
110 |
image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
|
111 |
-
|
112 |
-
|
113 |
-
if boxes is None:
|
114 |
-
print('No faces found')
|
115 |
-
|
116 |
-
box = boxes[0].astype(int)
|
117 |
-
face = image[box[1]:box[3], box[0]:box[2]]
|
118 |
-
face = cv2.resize(face, (224, 224))
|
119 |
|
120 |
-
if not face.any():
|
121 |
-
|
122 |
|
123 |
-
return
|
124 |
|
125 |
elif self.input_modality == 'audio':
|
126 |
print("INput modality is audio.")
|
@@ -134,8 +108,8 @@ class DetectionPipeline:
|
|
134 |
else:
|
135 |
raise ValueError("Invalid input modality. Must be either 'video' or image")
|
136 |
|
137 |
-
detection_video_pipeline = DetectionPipeline(
|
138 |
-
detection_image_pipeline = DetectionPipeline(
|
139 |
|
140 |
def deepfakes_video_predict(input_video):
|
141 |
|
@@ -206,8 +180,8 @@ def load_audio_model():
|
|
206 |
return model
|
207 |
|
208 |
audio_label_map = {
|
209 |
-
0: "
|
210 |
-
1: "
|
211 |
}
|
212 |
|
213 |
def deepfakes_audio_predict(input_audio):
|
|
|
16 |
|
17 |
|
18 |
# Load models.
|
|
|
19 |
model = tf.keras.models.load_model("efficientnet-b0/")
|
20 |
|
21 |
|
|
|
23 |
class DetectionPipeline:
|
24 |
"""Pipeline class for detecting faces in the frames of a video file."""
|
25 |
|
26 |
+
def __init__(self, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):
|
27 |
"""Constructor for DetectionPipeline class.
|
28 |
|
29 |
Keyword Arguments:
|
|
|
35 |
detection. A value less than 1 results in downsampling and a value greater than
|
36 |
1 result in upsampling. (default: {None})
|
37 |
"""
|
|
|
38 |
self.n_frames = n_frames
|
39 |
self.batch_size = batch_size
|
40 |
self.resize = resize
|
|
|
77 |
|
78 |
# When batch is full, detect faces and reset frame list
|
79 |
if len(frames) % self.batch_size == 0 or j == sample[-1]:
|
80 |
+
face2 = cv2.resize(frame, (224, 224))
|
81 |
+
faces.append(face2)
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
v_cap.release()
|
84 |
return faces
|
85 |
|
|
|
88 |
#Perform inference for image modality.
|
89 |
print('Reading image')
|
90 |
# print(f"Image path is: {filename}")
|
|
|
91 |
image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
|
92 |
+
image = cv2.resize(image, (224, 224))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
# if not face.any():
|
95 |
+
# print("No faces found...")
|
96 |
|
97 |
+
return image
|
98 |
|
99 |
elif self.input_modality == 'audio':
|
100 |
print("INput modality is audio.")
|
|
|
108 |
else:
|
109 |
raise ValueError("Invalid input modality. Must be either 'video' or image")
|
110 |
|
111 |
+
detection_video_pipeline = DetectionPipeline(n_frames=5, batch_size=1, input_modality='video')
|
112 |
+
detection_image_pipeline = DetectionPipeline(batch_size = 1, input_modality = 'image')
|
113 |
|
114 |
def deepfakes_video_predict(input_video):
|
115 |
|
|
|
180 |
return model
|
181 |
|
182 |
audio_label_map = {
|
183 |
+
0: "Real audio",
|
184 |
+
1: "Fake audio"
|
185 |
}
|
186 |
|
187 |
def deepfakes_audio_predict(input_audio):
|