Spaces:
				
			
			
	
			
			
		No application file
		
	
	
	
			
			
	
	
	
	
		
		
		No application file
		
	| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Tue Sep 8 13:31:25 2020 | |
| @author: infguo | |
| """ | |
| import os | |
| from typing import Dict | |
| import argparse | |
| import json | |
| import traceback | |
| import hashlib | |
| import cv2 | |
| from moviepy.editor import VideoFileClip | |
| # TODO | |
| # 该部分与源代码相比,修改了insight_face的输出接口,将性别分数透传出来,用于后续更精准的决策 | |
| def inference(frame, app, max_face=10): | |
| # Start to perform face recognition | |
| try: # Handle exception | |
| faces = app.get(frame, max_num=max_face) | |
| except Exception as e: | |
| print("is discarded due to exception {}!".format(e)) | |
| return | |
| if ( | |
| len(faces) == 0 | |
| ): # If the landmarks cannot be detected, the img will be discarded | |
| return | |
| return faces | |
| def predict(app, video_path, video_map, sample_fps): | |
| from insightface.app import FaceAnalysis | |
| video_name = ".".join(video_path.split("/")[-1].split(".")[:-1]) | |
| # video_hash_code = (os.popen('md5sum {}'.format(video_path))).readlines()[0].split(' ')[0] | |
| with open(video_path, "rb") as fd: | |
| data = fd.read() | |
| video_hash_code = hashlib.md5(data).hexdigest() | |
| assert video_hash_code == video_map["video_file_hash_code"] | |
| # Capture video | |
| video = VideoFileClip(video_path) | |
| video = video.crop(*video_map["content_box"]) | |
| fps = video.fps | |
| duration = video.duration | |
| total_frames = int(duration * fps) | |
| width, height = video.size | |
| print("fps, frame_count, width, height:", fps, total_frames, width, height) | |
| video_map["detect_fps"] = sample_fps | |
| video_map["face_detections"] = [] | |
| cnt_frame, step = 0, 0 | |
| fps = video_map["sample_fps"] | |
| for frame in video.iter_frames(fps=fps): | |
| if cnt_frame >= step: | |
| step += fps / sample_fps | |
| frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | |
| faces = inference(frame, app, max_face=0) | |
| if faces and len(faces) > 0: | |
| for f in faces: | |
| f["bbox"] = f["bbox"].tolist() | |
| f["kps"] = f["kps"].tolist() | |
| f["embedding"] = f["embedding"].tolist() | |
| f["det_score"] = str(f["det_score"]) | |
| f["gender"] = str(f["gender"]) | |
| f["age"] = str(f["age"]) | |
| else: | |
| faces = None | |
| video_map["face_detections"].append( | |
| {"frame_idx": cnt_frame, "faces": faces} | |
| ) | |
| cnt_frame += 1 | |
| return video_map | |
| class InsightfacePredictor(object): | |
| def __init__( | |
| self, | |
| sample_fps=10, | |
| ) -> None: | |
| # Load models | |
| self.sample_fps = sample_fps | |
| self.app = FaceAnalysis( | |
| allowed_modules=["detection", "genderage", "recognition"], | |
| providers=["CUDAExecutionProvider"], | |
| provider_options=[{"device_id": "0"}], | |
| ) | |
| self.app.prepare(ctx_id=0, det_thresh=0.3, det_size=(640, 640)) | |
| def __call__(self, video_path, video_map) -> Dict: | |
| video_info = predict(video_path, video_map, sample_fps=self.sample_fps) | |
| return video_info | |
