multimodal-deepfakes

Runtime error

App Files Files Community

Kimata commited on Jul 6, 2023

Commit

dacce75

1 Parent(s): d65254d

remove mtcnn detector

Browse files

Files changed (3) hide show

__pycache__/pipeline.cpython-39.pyc +0 -0
__pycache__/rawnet.cpython-39.pyc +0 -0
pipeline.py +11 -37

__pycache__/pipeline.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/pipeline.cpython-39.pyc and b/__pycache__/pipeline.cpython-39.pyc differ

__pycache__/rawnet.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/rawnet.cpython-39.pyc and b/__pycache__/rawnet.cpython-39.pyc differ

pipeline.py CHANGED Viewed

@@ -16,7 +16,6 @@ zip_ref.close()
 # Load models.
-mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')
 model = tf.keras.models.load_model("efficientnet-b0/")
@@ -24,7 +23,7 @@ model = tf.keras.models.load_model("efficientnet-b0/")
 class DetectionPipeline:
     """Pipeline class for detecting faces in the frames of a video file."""
-    def __init__(self, detector, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):
         """Constructor for DetectionPipeline class.
         Keyword Arguments:
@@ -36,7 +35,6 @@ class DetectionPipeline:
                 detection. A value less than 1 results in downsampling and a value greater than
                 1 result in upsampling. (default: {None})
         """
-        self.detector = detector
         self.n_frames = n_frames
         self.batch_size = batch_size
         self.resize = resize
@@ -79,25 +77,9 @@ class DetectionPipeline:
                     # When batch is full, detect faces and reset frame list
                     if len(frames) % self.batch_size == 0 or j == sample[-1]:
-                        boxes, _ = self.detector.detect(frames)
-                        for i in range(len(frames)):
-                            if boxes[i] is None:
-                                faces.append(face2)     #append previous face frame if no face is detected
-                                continue
-                            box = boxes[i][0].astype(int)
-                            frame = frames[i]
-                            face = frame[box[1]:box[3], box[0]:box[2]]
-                            if not face.any():
-                                faces.append(face2)     #append previous face frame if no face is detected
-                                continue
-                            face2 = cv2.resize(face, (224, 224))
-                            faces.append(face2)
-                        frames = []
             v_cap.release()
             return faces
@@ -106,21 +88,13 @@ class DetectionPipeline:
             #Perform inference for image modality.
             print('Reading image')
             # print(f"Image path is: {filename}")
-            # image = cv2.imread(filename)
             image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
-            boxes, _ = self.detector.detect(image)
-            if boxes is None:
-                print('No faces found')
-            box = boxes[0].astype(int)
-            face = image[box[1]:box[3], box[0]:box[2]]
-            face = cv2.resize(face, (224, 224))
-            if not face.any():
-                print("No faces found...")
-            return face
         elif self.input_modality == 'audio':
             print("INput modality is audio.")
@@ -134,8 +108,8 @@ class DetectionPipeline:
         else:
             raise ValueError("Invalid input modality. Must be either 'video' or image")
-detection_video_pipeline = DetectionPipeline(detector=mtcnn, n_frames=5, batch_size=1, input_modality='video')
-detection_image_pipeline = DetectionPipeline(detector=mtcnn, batch_size = 1, input_modality = 'image')
 def deepfakes_video_predict(input_video):
@@ -206,8 +180,8 @@ def load_audio_model():
     return model
 audio_label_map = {
-    0: "real",
-    1: "fake"
 }
 def deepfakes_audio_predict(input_audio):

 # Load models.
 model = tf.keras.models.load_model("efficientnet-b0/")
 class DetectionPipeline:
     """Pipeline class for detecting faces in the frames of a video file."""
+    def __init__(self, n_frames=None, batch_size=60, resize=None, input_modality = 'video'):
         """Constructor for DetectionPipeline class.
         Keyword Arguments:
                 detection. A value less than 1 results in downsampling and a value greater than
                 1 result in upsampling. (default: {None})
         """
         self.n_frames = n_frames
         self.batch_size = batch_size
         self.resize = resize
                     # When batch is full, detect faces and reset frame list
                     if len(frames) % self.batch_size == 0 or j == sample[-1]:
+                        face2 = cv2.resize(frame, (224, 224))
+                        faces.append(face2)
             v_cap.release()
             return faces
             #Perform inference for image modality.
             print('Reading image')
             # print(f"Image path is: {filename}")
             image = cv2.cvtColor(filename, cv2.COLOR_BGR2RGB)
+            image = cv2.resize(image, (224, 224))
+            # if not face.any():
+            #     print("No faces found...")
+            return image
         elif self.input_modality == 'audio':
             print("INput modality is audio.")
         else:
             raise ValueError("Invalid input modality. Must be either 'video' or image")
+detection_video_pipeline = DetectionPipeline(n_frames=5, batch_size=1, input_modality='video')
+detection_image_pipeline = DetectionPipeline(batch_size = 1, input_modality = 'image')
 def deepfakes_video_predict(input_video):
     return model
 audio_label_map = {
+    0: "Real audio",
+    1: "Fake audio"
 }
 def deepfakes_audio_predict(input_audio):