1
+ from typing import Mapping
2
+
3
+ import mediapipe as mp
4
+ import numpy
5
+
6
+
7
+ mp_drawing = mp .solutions .drawing_utils
8
+ mp_drawing_styles = mp .solutions .drawing_styles
9
+ mp_face_detection = mp .solutions .face_detection # Only for counting faces.
10
+ mp_face_mesh = mp .solutions .face_mesh
11
+ mp_face_connections = mp .solutions .face_mesh_connections .FACEMESH_TESSELATION
12
+ mp_hand_connections = mp .solutions .hands_connections .HAND_CONNECTIONS
13
+ mp_body_connections = mp .solutions .pose_connections .POSE_CONNECTIONS
14
+
15
+ DrawingSpec = mp .solutions .drawing_styles .DrawingSpec
16
+ PoseLandmark = mp .solutions .drawing_styles .PoseLandmark
17
+
18
+ min_face_size_pixels : int = 64
19
+ f_thick = 2
20
+ f_rad = 1
21
+ right_iris_draw = DrawingSpec (color = (10 , 200 , 250 ), thickness = f_thick , circle_radius = f_rad )
22
+ right_eye_draw = DrawingSpec (color = (10 , 200 , 180 ), thickness = f_thick , circle_radius = f_rad )
23
+ right_eyebrow_draw = DrawingSpec (color = (10 , 220 , 180 ), thickness = f_thick , circle_radius = f_rad )
24
+ left_iris_draw = DrawingSpec (color = (250 , 200 , 10 ), thickness = f_thick , circle_radius = f_rad )
25
+ left_eye_draw = DrawingSpec (color = (180 , 200 , 10 ), thickness = f_thick , circle_radius = f_rad )
26
+ left_eyebrow_draw = DrawingSpec (color = (180 , 220 , 10 ), thickness = f_thick , circle_radius = f_rad )
27
+ mouth_draw = DrawingSpec (color = (10 , 180 , 10 ), thickness = f_thick , circle_radius = f_rad )
28
+ head_draw = DrawingSpec (color = (10 , 200 , 10 ), thickness = f_thick , circle_radius = f_rad )
29
+
30
+ # mp_face_mesh.FACEMESH_CONTOURS has all the items we care about.
31
+ face_connection_spec = {}
32
+ for edge in mp_face_mesh .FACEMESH_FACE_OVAL :
33
+ face_connection_spec [edge ] = head_draw
34
+ for edge in mp_face_mesh .FACEMESH_LEFT_EYE :
35
+ face_connection_spec [edge ] = left_eye_draw
36
+ for edge in mp_face_mesh .FACEMESH_LEFT_EYEBROW :
37
+ face_connection_spec [edge ] = left_eyebrow_draw
38
+ # for edge in mp_face_mesh.FACEMESH_LEFT_IRIS:
39
+ # face_connection_spec[edge] = left_iris_draw
40
+ for edge in mp_face_mesh .FACEMESH_RIGHT_EYE :
41
+ face_connection_spec [edge ] = right_eye_draw
42
+ for edge in mp_face_mesh .FACEMESH_RIGHT_EYEBROW :
43
+ face_connection_spec [edge ] = right_eyebrow_draw
44
+ # for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS:
45
+ # face_connection_spec[edge] = right_iris_draw
46
+ for edge in mp_face_mesh .FACEMESH_LIPS :
47
+ face_connection_spec [edge ] = mouth_draw
48
+ iris_landmark_spec = {468 : right_iris_draw , 473 : left_iris_draw }
49
+
50
+
51
+ def draw_pupils (image , landmark_list , drawing_spec , halfwidth : int = 2 ):
52
+ """We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all
53
+ landmarks. Until our PR is merged into mediapipe, we need this separate method."""
54
+ if len (image .shape ) != 3 :
55
+ raise ValueError ("Input image must be H,W,C." )
56
+ image_rows , image_cols , image_channels = image .shape
57
+ if image_channels != 3 : # BGR channels
58
+ raise ValueError ('Input image must contain three channel bgr data.' )
59
+ for idx , landmark in enumerate (landmark_list .landmark ):
60
+ if (
61
+ (landmark .HasField ('visibility' ) and landmark .visibility < 0.9 ) or
62
+ (landmark .HasField ('presence' ) and landmark .presence < 0.5 )
63
+ ):
64
+ continue
65
+ if landmark .x >= 1.0 or landmark .x < 0 or landmark .y >= 1.0 or landmark .y < 0 :
66
+ continue
67
+ image_x = int (image_cols * landmark .x )
68
+ image_y = int (image_rows * landmark .y )
69
+ draw_color = None
70
+ if isinstance (drawing_spec , Mapping ):
71
+ if drawing_spec .get (idx ) is None :
72
+ continue
73
+ else :
74
+ draw_color = drawing_spec [idx ].color
75
+ elif isinstance (drawing_spec , DrawingSpec ):
76
+ draw_color = drawing_spec .color
77
+ image [image_y - halfwidth :image_y + halfwidth , image_x - halfwidth :image_x + halfwidth , :] = draw_color
78
+
79
+
80
+ def reverse_channels (image ):
81
+ """Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB."""
82
+ # im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order.
83
+ # im[:,:,::[2,1,0]] would also work but makes a copy of the data.
84
+ return image [:, :, ::- 1 ]
85
+
86
+
87
+ def generate_annotation (
88
+ img_rgb ,
89
+ max_faces : int ,
90
+ min_confidence : float
91
+ ):
92
+ """
93
+ Find up to 'max_faces' inside the provided input image.
94
+ If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many
95
+ pixels in the image.
96
+ """
97
+ with mp_face_mesh .FaceMesh (
98
+ static_image_mode = True ,
99
+ max_num_faces = max_faces ,
100
+ refine_landmarks = True ,
101
+ min_detection_confidence = min_confidence ,
102
+ ) as facemesh :
103
+ img_height , img_width , img_channels = img_rgb .shape
104
+ assert (img_channels == 3 )
105
+
106
+ results = facemesh .process (img_rgb ).multi_face_landmarks
107
+
108
+ if results is None :
109
+ print ("No faces detected in controlnet image for Mediapipe face annotator." )
110
+ return numpy .zeros_like (img_rgb )
111
+
112
+ # Filter faces that are too small
113
+ filtered_landmarks = []
114
+ for lm in results :
115
+ landmarks = lm .landmark
116
+ face_rect = [
117
+ landmarks [0 ].x ,
118
+ landmarks [0 ].y ,
119
+ landmarks [0 ].x ,
120
+ landmarks [0 ].y ,
121
+ ] # Left, up, right, down.
122
+ for i in range (len (landmarks )):
123
+ face_rect [0 ] = min (face_rect [0 ], landmarks [i ].x )
124
+ face_rect [1 ] = min (face_rect [1 ], landmarks [i ].y )
125
+ face_rect [2 ] = max (face_rect [2 ], landmarks [i ].x )
126
+ face_rect [3 ] = max (face_rect [3 ], landmarks [i ].y )
127
+ if min_face_size_pixels > 0 :
128
+ face_width = abs (face_rect [2 ] - face_rect [0 ])
129
+ face_height = abs (face_rect [3 ] - face_rect [1 ])
130
+ face_width_pixels = face_width * img_width
131
+ face_height_pixels = face_height * img_height
132
+ face_size = min (face_width_pixels , face_height_pixels )
133
+ if face_size >= min_face_size_pixels :
134
+ filtered_landmarks .append (lm )
135
+ else :
136
+ filtered_landmarks .append (lm )
137
+
138
+ # Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start.
139
+ empty = numpy .zeros_like (img_rgb )
140
+
141
+ # Draw detected faces:
142
+ for face_landmarks in filtered_landmarks :
143
+ mp_drawing .draw_landmarks (
144
+ empty ,
145
+ face_landmarks ,
146
+ connections = face_connection_spec .keys (),
147
+ landmark_drawing_spec = None ,
148
+ connection_drawing_spec = face_connection_spec
149
+ )
150
+ draw_pupils (empty , face_landmarks , iris_landmark_spec , 2 )
151
+
152
+ # Flip BGR back to RGB.
153
+ empty = reverse_channels (empty ).copy ()
154
+
155
+ return empty
0 commit comments