ControlNetMediaPipeFace / laion_face_common.py

Joseph Catrambone

First import. Add ControlNetSD21 Laion Face (full, pruned, and safetensors). Add README and samples. Add surrounding tools for example use.

568dc2c over 2 years ago

raw

history blame

8.19 kB

	from typing import Mapping

	import mediapipe as mp
	import numpy
	from PIL import Image


	mp_drawing = mp.solutions.drawing_utils
	mp_drawing_styles = mp.solutions.drawing_styles
	mp_face_detection = mp.solutions.face_detection # Only for counting faces.
	mp_face_mesh = mp.solutions.face_mesh
	mp_face_connections = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION
	mp_hand_connections = mp.solutions.hands_connections.HAND_CONNECTIONS
	mp_body_connections = mp.solutions.pose_connections.POSE_CONNECTIONS

	DrawingSpec = mp.solutions.drawing_styles.DrawingSpec
	PoseLandmark = mp.solutions.drawing_styles.PoseLandmark

	f_thick = 2
	f_rad = 1
	right_iris_draw = DrawingSpec(color=(10, 200, 250), thickness=f_thick, circle_radius=f_rad)
	right_eye_draw = DrawingSpec(color=(10, 200, 180), thickness=f_thick, circle_radius=f_rad)
	right_eyebrow_draw = DrawingSpec(color=(10, 220, 180), thickness=f_thick, circle_radius=f_rad)
	left_iris_draw = DrawingSpec(color=(250, 200, 10), thickness=f_thick, circle_radius=f_rad)
	left_eye_draw = DrawingSpec(color=(180, 200, 10), thickness=f_thick, circle_radius=f_rad)
	left_eyebrow_draw = DrawingSpec(color=(180, 220, 10), thickness=f_thick, circle_radius=f_rad)
	mouth_draw = DrawingSpec(color=(10, 180, 10), thickness=f_thick, circle_radius=f_rad)
	head_draw = DrawingSpec(color=(10, 200, 10), thickness=f_thick, circle_radius=f_rad)

	# mp_face_mesh.FACEMESH_CONTOURS has all the items we care about.
	face_connection_spec = {}
	for edge in mp_face_mesh.FACEMESH_FACE_OVAL:
	face_connection_spec[edge] = head_draw
	for edge in mp_face_mesh.FACEMESH_LEFT_EYE:
	face_connection_spec[edge] = left_eye_draw
	for edge in mp_face_mesh.FACEMESH_LEFT_EYEBROW:
	face_connection_spec[edge] = left_eyebrow_draw
	# for edge in mp_face_mesh.FACEMESH_LEFT_IRIS:
	# face_connection_spec[edge] = left_iris_draw
	for edge in mp_face_mesh.FACEMESH_RIGHT_EYE:
	face_connection_spec[edge] = right_eye_draw
	for edge in mp_face_mesh.FACEMESH_RIGHT_EYEBROW:
	face_connection_spec[edge] = right_eyebrow_draw
	# for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS:
	# face_connection_spec[edge] = right_iris_draw
	for edge in mp_face_mesh.FACEMESH_LIPS:
	face_connection_spec[edge] = mouth_draw
	iris_landmark_spec = {468: right_iris_draw, 473: left_iris_draw}


	def draw_pupils(image, landmark_list, drawing_spec, halfwidth: int = 2):
	"""We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all
	landmarks. Until our PR is merged into mediapipe, we need this separate method."""
	if len(image.shape) != 3:
	raise ValueError("Input image must be H,W,C.")
	image_rows, image_cols, image_channels = image.shape
	if image_channels != 3: # BGR channels
	raise ValueError('Input image must contain three channel bgr data.')
	for idx, landmark in enumerate(landmark_list.landmark):
	if (
	(landmark.HasField('visibility') and landmark.visibility < 0.9) or
	(landmark.HasField('presence') and landmark.presence < 0.5)
	):
	continue
	if landmark.x >= 1.0 or landmark.x < 0 or landmark.y >= 1.0 or landmark.y < 0:
	continue
	image_x = int(image_cols*landmark.x)
	image_y = int(image_rows*landmark.y)
	draw_color = None
	if isinstance(drawing_spec, Mapping):
	if drawing_spec.get(idx) is None:
	continue
	else:
	draw_color = drawing_spec[idx].color
	elif isinstance(drawing_spec, DrawingSpec):
	draw_color = drawing_spec.color
	image[image_y-halfwidth:image_y+halfwidth, image_x-halfwidth:image_x+halfwidth, :] = draw_color


	def reverse_channels(image):
	"""Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB."""
	# im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order.
	# im[:,:,::[2,1,0]] would also work but makes a copy of the data.
	return image[:, :, ::-1]


	def generate_annotation(
	input_image: Image.Image,
	max_faces: int,
	min_face_size_pixels: int = 0,
	return_annotation_data: bool = False
	):
	"""
	Find up to 'max_faces' inside the provided input image.
	If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many
	pixels in the image.
	If return_annotation_data is TRUE (default: false) then in addition to returning the 'detected face' image, three
	additional parameters will be returned: faces before filtering, faces after filtering, and an annotation image.
	The faces_before_filtering return value is the number of faces detected in an image with no filtering.
	faces_after_filtering is the number of faces remaining after filtering small faces.

	:return:
	If 'return_annotation_data==True', returns (numpy array, numpy array, int, int).
	If 'return_annotation_data==False' (default), returns a numpy array.
	"""
	with mp_face_mesh.FaceMesh(
	static_image_mode=True,
	max_num_faces=max_faces,
	refine_landmarks=True,
	min_detection_confidence=0.5,
	) as facemesh:
	img_rgb = numpy.asarray(input_image)
	results = facemesh.process(img_rgb).multi_face_landmarks

	faces_found_before_filtering = len(results)

	# Filter faces that are too small
	filtered_landmarks = []
	for lm in results:
	landmarks = lm.landmark
	face_rect = [
	landmarks[0].x,
	landmarks[0].y,
	landmarks[0].x,
	landmarks[0].y,
	] # Left, up, right, down.
	for i in range(len(landmarks)):
	face_rect[0] = min(face_rect[0], landmarks[i].x)
	face_rect[1] = min(face_rect[1], landmarks[i].y)
	face_rect[2] = max(face_rect[2], landmarks[i].x)
	face_rect[3] = max(face_rect[3], landmarks[i].y)
	if min_face_size_pixels > 0:
	face_width = abs(face_rect[2] - face_rect[0])
	face_height = abs(face_rect[3] - face_rect[1])
	face_width_pixels = face_width * input_image.size[0]
	face_height_pixels = face_height * input_image.size[1]
	face_size = min(face_width_pixels, face_height_pixels)
	if face_size >= min_face_size_pixels:
	filtered_landmarks.append(lm)
	else:
	filtered_landmarks.append(lm)

	faces_remaining_after_filtering = len(filtered_landmarks)

	# Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start.
	empty = numpy.zeros_like(img_rgb)

	# Draw detected faces:
	for face_landmarks in filtered_landmarks:
	mp_drawing.draw_landmarks(
	empty,
	face_landmarks,
	connections=face_connection_spec.keys(),
	landmark_drawing_spec=None,
	connection_drawing_spec=face_connection_spec
	)
	draw_pupils(empty, face_landmarks, iris_landmark_spec, 2)

	# Flip BGR back to RGB.
	empty = reverse_channels(empty)

	# We might have to generate a composite.
	if return_annotation_data:
	# Note that we're copying the input image AND flipping the channels so we can draw on top of it.
	annotated = reverse_channels(numpy.asarray(input_image)).copy()
	for face_landmarks in filtered_landmarks:
	mp_drawing.draw_landmarks(
	empty,
	face_landmarks,
	connections=face_connection_spec.keys(),
	landmark_drawing_spec=None,
	connection_drawing_spec=face_connection_spec
	)
	draw_pupils(empty, face_landmarks, iris_landmark_spec, 2)
	annotated = reverse_channels(annotated)

	if not return_annotation_data:
	return empty
	else:
	return empty, annotated, faces_found_before_filtering, faces_remaining_after_filtering