Spaces:
Runtime error
Runtime error
remove mtcnn detector
Browse files- __pycache__/pipeline.cpython-39.pyc +0 -0
- __pycache__/rawnet.cpython-39.pyc +0 -0
- pipeline.py +11 -37
__pycache__/pipeline.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/pipeline.cpython-39.pyc and b/__pycache__/pipeline.cpython-39.pyc differ
|
|
|
__pycache__/rawnet.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/rawnet.cpython-39.pyc and b/__pycache__/rawnet.cpython-39.pyc differ
|
|
|
pipeline.py
CHANGED
|
@@ -16,7 +16,6 @@ zip_ref.close()
|
|
| 16 |
|
| 17 |
|
| 18 |
# Load models.
|
| 19 |
-
mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')
|
| 20 |
model = tf.keras.models.load_model("efficientnet-b0/")
|
| 21 |
|
| 22 |
|
|
@@ -24,7 +23,7 @@ model = tf.keras.models.load_model("efficientnet-b0/")
|
|
| 24 |
class DetectionPipeline:
|
| 25 |
"""Pipeline class for detecting faces in the frames of a video file."""
|
| 26 |
|
| 27 |
-
def __init__(self,
|
| 28 |
"""Constructor for DetectionPipeline class.
|
| 29 |
|
| 30 |
Keyword Arguments:
|
|
@@ -36,7 +35,6 @@ class DetectionPipeline:
|
|
| 36 |
detection. A value less than 1 results in downsampling and a value greater than
|
| 37 |
1 result in upsampling. (default: {None})
|
| 38 |
"""
|
| 39 |
-
self.detector = detector
|
| 40 |
self.n_frames = n_frames
|
| 41 |
self.batch_size = batch_size
|
| 42 |
self.resize = resize
|
|
@@ -79,25 +77,9 @@ class DetectionPipeline:
|
|
| 79 |
|
| 80 |
# When batch is full, detect faces and reset frame list
|
| 81 |
if len(frames) % self.batch_size == 0 or j == sample[-1]:
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
boxes, _ = self.detector.detect(frames)
|
| 84 |
-
|
| 85 |
-
for i in range(len(frames)):
|
| 86 |
-
if boxes[i] is None:
|
| 87 |
-
faces.append(face2) #append previous face frame if no face is detected
|
| 88 |
-
continue
|
| 89 |
-
|
| 90 |
-
box = boxes[i][0].astype(int)
|
| 91 |
-
frame = frames[i]
|
| 92 |
-
face = frame[box[1]:box[3], box[0]:box[2]]
|
| 93 |
-
|
| 94 |
-
if not face.any():
|
| 95 |
-
faces.append(face2) #append previous face frame if no face is detected
|
| 96 |
-
continue
|
| 97 |
-
|
| 98 |
-
face2 = cv2.resize(face, (224, 224))
|
| 99 |
-
faces.append(face2)
|
| 100 |
-
frames = []
|
| 101 |
v_cap.release()
|
| 102 |
return faces
|
| 103 |
|
|
@@ -106,21 +88,13 @@ class DetectionPipeline:
|
|
| 106 |
#Perform inference for image modality.
|
| 107 |
print('Reading image')
|
| 108 |
# print(f"Image path is: {filename}")
|
| 109 |
-
# image = cv2.imread(filename)
|
| 110 |
image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
if boxes is None:
|
| 114 |
-
print('No faces found')
|
| 115 |
-
|
| 116 |
-
box = boxes[0].astype(int)
|
| 117 |
-
face = image[box[1]:box[3], box[0]:box[2]]
|
| 118 |
-
face = cv2.resize(face, (224, 224))
|
| 119 |
|
| 120 |
-
if not face.any():
|
| 121 |
-
|
| 122 |
|
| 123 |
-
return
|
| 124 |
|
| 125 |
elif self.input_modality == 'audio':
|
| 126 |
print("INput modality is audio.")
|
|
@@ -134,8 +108,8 @@ class DetectionPipeline:
|
|
| 134 |
else:
|
| 135 |
raise ValueError("Invalid input modality. Must be either 'video' or image")
|
| 136 |
|
| 137 |
-
detection_video_pipeline = DetectionPipeline(
|
| 138 |
-
detection_image_pipeline = DetectionPipeline(
|
| 139 |
|
| 140 |
def deepfakes_video_predict(input_video):
|
| 141 |
|
|
@@ -206,8 +180,8 @@ def load_audio_model():
|
|
| 206 |
return model
|
| 207 |
|
| 208 |
audio_label_map = {
|
| 209 |
-
0: "
|
| 210 |
-
1: "
|
| 211 |
}
|
| 212 |
|
| 213 |
def deepfakes_audio_predict(input_audio):
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# Load models.
|
|
|
|
| 19 |
model = tf.keras.models.load_model("efficientnet-b0/")
|
| 20 |
|
| 21 |
|
|
|
|
| 23 |
class DetectionPipeline:
|
| 24 |
"""Pipeline class for detecting faces in the frames of a video file."""
|
| 25 |
|
| 26 |
+
def __init__(self, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):
|
| 27 |
"""Constructor for DetectionPipeline class.
|
| 28 |
|
| 29 |
Keyword Arguments:
|
|
|
|
| 35 |
detection. A value less than 1 results in downsampling and a value greater than
|
| 36 |
1 result in upsampling. (default: {None})
|
| 37 |
"""
|
|
|
|
| 38 |
self.n_frames = n_frames
|
| 39 |
self.batch_size = batch_size
|
| 40 |
self.resize = resize
|
|
|
|
| 77 |
|
| 78 |
# When batch is full, detect faces and reset frame list
|
| 79 |
if len(frames) % self.batch_size == 0 or j == sample[-1]:
|
| 80 |
+
face2 = cv2.resize(frame, (224, 224))
|
| 81 |
+
faces.append(face2)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
v_cap.release()
|
| 84 |
return faces
|
| 85 |
|
|
|
|
| 88 |
#Perform inference for image modality.
|
| 89 |
print('Reading image')
|
| 90 |
# print(f"Image path is: {filename}")
|
|
|
|
| 91 |
image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
|
| 92 |
+
image = cv2.resize(image, (224, 224))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
# if not face.any():
|
| 95 |
+
# print("No faces found...")
|
| 96 |
|
| 97 |
+
return image
|
| 98 |
|
| 99 |
elif self.input_modality == 'audio':
|
| 100 |
print("INput modality is audio.")
|
|
|
|
| 108 |
else:
|
| 109 |
raise ValueError("Invalid input modality. Must be either 'video' or image")
|
| 110 |
|
| 111 |
+
detection_video_pipeline = DetectionPipeline(n_frames=5, batch_size=1, input_modality='video')
|
| 112 |
+
detection_image_pipeline = DetectionPipeline(batch_size = 1, input_modality = 'image')
|
| 113 |
|
| 114 |
def deepfakes_video_predict(input_video):
|
| 115 |
|
|
|
|
| 180 |
return model
|
| 181 |
|
| 182 |
audio_label_map = {
|
| 183 |
+
0: "Real audio",
|
| 184 |
+
1: "Fake audio"
|
| 185 |
}
|
| 186 |
|
| 187 |
def deepfakes_audio_predict(input_audio):
|