Spaces:

kevinwang676
/

MuseV

No application file

App Files Files Community

MuseV / mmcm /vision /human /insightface_predictor.py

kevinwang676

Upload folder using huggingface_hub

bb46cbe verified over 1 year ago

raw

history blame contribute delete

3.11 kB

	# -- coding: utf-8 --
	"""
	Created on Tue Sep 8 13:31:25 2020

	@author: infguo
	"""

	import os
	from typing import Dict
	import argparse
	import json
	import traceback
	import hashlib

	import cv2
	from moviepy.editor import VideoFileClip

	# TODO
	# 该部分与源代码相比，修改了insight_face的输出接口，将性别分数透传出来，用于后续更精准的决策


	def inference(frame, app, max_face=10):
	# Start to perform face recognition
	try: # Handle exception
	faces = app.get(frame, max_num=max_face)
	except Exception as e:
	print("is discarded due to exception {}!".format(e))
	return

	if (
	len(faces) == 0
	): # If the landmarks cannot be detected, the img will be discarded
	return
	return faces


	def predict(app, video_path, video_map, sample_fps):
	from insightface.app import FaceAnalysis

	video_name = ".".join(video_path.split("/")[-1].split(".")[:-1])
	# video_hash_code = (os.popen('md5sum {}'.format(video_path))).readlines()[0].split(' ')[0]
	with open(video_path, "rb") as fd:
	data = fd.read()
	video_hash_code = hashlib.md5(data).hexdigest()

	assert video_hash_code == video_map["video_file_hash_code"]

	# Capture video
	video = VideoFileClip(video_path)
	video = video.crop(*video_map["content_box"])
	fps = video.fps
	duration = video.duration
	total_frames = int(duration * fps)
	width, height = video.size
	print("fps, frame_count, width, height:", fps, total_frames, width, height)

	video_map["detect_fps"] = sample_fps
	video_map["face_detections"] = []

	cnt_frame, step = 0, 0

	fps = video_map["sample_fps"]
	for frame in video.iter_frames(fps=fps):
	if cnt_frame >= step:
	step += fps / sample_fps
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	faces = inference(frame, app, max_face=0)
	if faces and len(faces) > 0:
	for f in faces:
	f["bbox"] = f["bbox"].tolist()
	f["kps"] = f["kps"].tolist()
	f["embedding"] = f["embedding"].tolist()
	f["det_score"] = str(f["det_score"])
	f["gender"] = str(f["gender"])
	f["age"] = str(f["age"])
	else:
	faces = None
	video_map["face_detections"].append(
	{"frame_idx": cnt_frame, "faces": faces}
	)

	cnt_frame += 1
	return video_map


	class InsightfacePredictor(object):
	def __init__(
	self,
	sample_fps=10,
	) -> None:
	# Load models
	self.sample_fps = sample_fps
	self.app = FaceAnalysis(
	allowed_modules=["detection", "genderage", "recognition"],
	providers=["CUDAExecutionProvider"],
	provider_options=[{"device_id": "0"}],
	)
	self.app.prepare(ctx_id=0, det_thresh=0.3, det_size=(640, 640))

	def __call__(self, video_path, video_map) -> Dict:
	video_info = predict(video_path, video_map, sample_fps=self.sample_fps)
	return video_info