update MobileFaceNet using static layer name and add training result

leondgarse · May 15, 2023 · 2dae8e6 · 2dae8e6
1 parent 2f1e332
commit 2dae8e6
Show file tree

Hide file tree

Showing 11 changed files with 107 additions and 137 deletions.
diff --git a/IJB_evals.py b/IJB_evals.py
@@ -400,7 +400,7 @@ def run_model_test_bunch(self):
         return scores, names
 
     def run_model_test_1N(self, npoints=100):
-        fars_cal = [10 ** ii for ii in np.arange(-4, 0, 4 / npoints)] + [1]  # plot in range [10-4, 1]
+        fars_cal = [10**ii for ii in np.arange(-4, 0, 4 / npoints)] + [1]  # plot in range [10-4, 1]
         fars_show_idx = np.arange(len(fars_cal))[:: npoints // 4]  # npoints=100, fars_show=[0.0001, 0.001, 0.01, 0.1, 1.0]
 
         g1_templates, g1_ids, g2_templates, g2_ids, probe_mixed_templates, probe_mixed_ids = extract_gallery_prob_data(
@@ -501,7 +501,7 @@ def plot_roc_and_calculate_tpr(scores, names=None, label=None):
             plt.plot(fpr_dict[name], tpr_dict[name], lw=1, label="[%s (AUC = %0.4f%%)]" % (name, roc_auc_dict[name] * 100))
         title = "ROC on IJB" + name.split("IJB")[-1][0] if "IJB" in name else "ROC on IJB"
 
-        plt.xlim([10 ** -6, 0.1])
+        plt.xlim([10**-6, 0.1])
         plt.xscale("log")
         plt.xticks(x_labels)
         plt.xlabel("False Positive Rate")

diff --git a/README.md b/README.md
@@ -42,6 +42,7 @@
   | [Resnet34](https://github.com/leondgarse/Keras_insightface/releases/download/v1.0.0/resnet34_MXNET_E_SGD_REG_1e3_on_batch_true_lr1e1_random0_arc_S32_E1_BS512_casia_basic_agedb_30_epoch_36_0.949500.h5) | [CASIA, E40](https://github.com/leondgarse/Keras_insightface/discussions/36)  | 0.994667 | 0.949143 |   0.9495 |          |          |
   | [Mobilenet emb256](https://drive.google.com/file/d/1i0B6Hy1clGgfeOYtUXVPNveDEe2DTIBa/view?usp=sharing) | [Emore,E110](https://github.com/leondgarse/Keras_insightface/discussions/15#discussioncomment-286398) | 0.996000 | 0.951714 | 0.959333 | 0.887147 | 0.911745 |
   | [Mobilenet distill](https://drive.google.com/file/d/1yUjCG5rMeVCKTSPbST2F9BrRRlkDPzEA/view?usp=sharing) | [MS1MV3,E50](https://github.com/leondgarse/Keras_insightface/discussions/30) | 0.997333    | 0.969    | 0.975333 | 0.91889   | 0.940328 |
+  | [se_mobile_facenet](https://github.com/leondgarse/Keras_insightface/releases/download/v1.0.0/se_mobilefacenet_pointwise_GDC_arc_emb256_dr0_sgd_bs512_ms1m_rand_0_bnm09_bne1e4_cos16_batch_float16_basic_model_latest.h5) | [MS1MV3,E50](https://github.com/leondgarse/Keras_insightface/discussions/15#discussioncomment-5904827) | 0.99717 | 0.971803 | 0.969333 | 0.921811  | 0.940891  |
   | [Ghostnet,S2,swish](https://github.com/leondgarse/Keras_insightface/releases/download/v1.0.0/ghostnet_130_960_s2_swish_se_swish_imagenet_bs512_lr01_test_basic_agedb_30_epoch_48_0.973667.h5) | [MS1MV3,E50](https://github.com/leondgarse/Keras_insightface/discussions/15#discussioncomment-322997) | 0.997333 | 0.966143 | 0.973667 | 0.923661 | 0.941402 |
   | [Ghostnet,S1,swish](https://github.com/leondgarse/Keras_insightface/releases/download/v1.0.0/TT_ghostnet_s1_swish_GDC_lr003125_bs160_test_E50_arc_basic_agedb_30_epoch_17_0.978167.h5) | [MS1MV3,E67](https://github.com/leondgarse/Keras_insightface/discussions/15#discussioncomment-583252) | 0.997500 | 0.981429 | 0.978167 | 0.93739  | 0.953163 |
   | [EfficientNetV2B0](https://github.com/leondgarse/Keras_insightface/releases/download/v1.0.0/TT_efv2_b0_swish_GDC_arc_emb512_dr0_sgd_l2_5e4_bs512_ms1m_randaug_cutout_bnm09_bne1e4_cos16_batch_float16_E50_arc_sgd_LA_basic_agedb_30_epoch_17_0.977333.h5) | [MS1MV3,E67](https://github.com/leondgarse/Keras_insightface/discussions/42) | 0.997833 | 0.976571 | 0.977333 | **0.940701** | **0.955259** |

diff --git a/backbones/mobile_facenet.py b/backbones/mobile_facenet.py
@@ -1,123 +1,92 @@
-from tensorflow.keras import backend as K
-from tensorflow.keras.layers import (
-    Input,
-    Conv2D,
-    BatchNormalization,
-    Layer,
-    PReLU,
-    SeparableConv2D,
-    DepthwiseConv2D,
-    add,
-    Flatten,
-    Dense,
-    Dropout,
-    GlobalAveragePooling2D,
-    Reshape,
-    Multiply,
-)
-from tensorflow.keras.models import Model
-
-"""Building Block Functions"""
-
-
-def se_block(inputs, reduction=16):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    filters = inputs.shape[channel_axis]
-    nn = GlobalAveragePooling2D()(inputs)
-    nn = Reshape((1, 1, filters))(nn)
-    nn = Conv2D(filters // reduction, kernel_size=1)(nn)
-    nn = PReLU(shared_axes=[1, 2])(nn)
-    nn = Conv2D(filters, kernel_size=1, activation="sigmoid")(nn)
-    nn = Multiply()([inputs, nn])
+import tensorflow as tf
+from tensorflow.keras import layers, models, initializers
+
+
+def se_block(inputs, reduction=16, name=""):
+    input_channels = inputs.shape[-1]
+    nn = layers.GlobalAveragePooling2D(keepdims=True)(inputs)
+    # nn = Reshape((1, 1, input_channels))(nn)
+    nn = layers.Conv2D(input_channels // reduction, kernel_size=1, name=name + "1_conv")(nn)
+    nn = layers.PReLU(shared_axes=[1, 2], alpha_initializer=initializers.Constant(0.25), name=name + "prelu")(nn)
+    nn = layers.Conv2D(input_channels, kernel_size=1, name=name + "2_conv")(nn)
+    nn = layers.Activation(activation="sigmoid")(nn)
+    nn = layers.Multiply(name=name + "out")([inputs, nn])
     return nn
 
 
-def se_block_2(inputs, reduction=16):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    filters = inputs.shape[channel_axis]
-    se = GlobalAveragePooling2D()(inputs)
-    se = Dense(filters // reduction, activation="PReLU", use_bias=False)(se)
-    se = Dense(filters, activation="sigmoid", use_bias=False)(se)
-    # if K.image_data_format() == 'channels_first':
-    #     se = Permute((3, 1, 2))(se)
-    x = Multiply()([inputs, se])
-    return x
-
-
-def conv_block(inputs, filters, kernel_size, strides, padding):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    Z = Conv2D(filters, kernel_size, strides=strides, padding=padding, use_bias=False)(inputs)
-    Z = BatchNormalization(axis=channel_axis)(Z)
-    A = PReLU(shared_axes=[1, 2])(Z)
-    return A
-
-
-def separable_conv_block(inputs, filters, kernel_size, strides):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    Z = SeparableConv2D(filters, kernel_size, strides=strides, padding="same", use_bias=False)(inputs)
-    Z = BatchNormalization(axis=channel_axis)(Z)
-    A = PReLU(shared_axes=[1, 2])(Z)
-    return A
-
-
-def bottleneck(inputs, filters, kernel, t, s, r=False, se=False):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    tchannel = K.int_shape(inputs)[channel_axis] * t
-    Z1 = conv_block(inputs, tchannel, 1, 1, "same")
-    Z1 = DepthwiseConv2D(kernel, strides=s, padding="same", depth_multiplier=1, use_bias=False)(Z1)
-    Z1 = BatchNormalization(axis=channel_axis)(Z1)
-    A1 = PReLU(shared_axes=[1, 2])(Z1)
-    Z2 = Conv2D(filters, 1, strides=1, padding="same", use_bias=False)(A1)
-    Z2 = BatchNormalization(axis=channel_axis)(Z2)
-    if se:
-        Z2 = se_block(Z2)
-    if r:
-        Z2 = add([Z2, inputs])
-    return Z2
-
-
-def inverted_residual_block(inputs, filters, kernel, t, strides, n, se=False):
-    Z = bottleneck(inputs, filters, kernel, t, strides, se=se)
-    for i in range(1, n):
-        Z = bottleneck(Z, filters, kernel, t, 1, True, se=se)
-    return Z
-
-
-def linear_GD_conv_block(inputs, kernel_size, strides):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    Z = DepthwiseConv2D(kernel_size, strides=strides, padding="valid", depth_multiplier=1, use_bias=False)(inputs)
-    Z = BatchNormalization(axis=channel_axis)(Z)
-    return Z
-
-
-def mobile_facenet(emb_shape=128, input_shape=(112, 112, 3), dropout=1, name="mobile_facenet", weight_file=None, use_se=False, include_top=True):
-    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
-    if K.image_data_format() == "channels_first":
-        X = Input(shape=(input_shape[-1], input_shape[0], input_shape[1]))
+def conv_bn_prelu(inputs, filters=-1, kernel_size=1, strides=1, padding="SAME", use_depthwise=False, use_separable=False, activation="prelu", name=""):
+    filters = filters if filters > 0 else inputs.shape[-1]
+    if use_depthwise:
+        nn = layers.DepthwiseConv2D(kernel_size, strides=strides, padding=padding, use_bias=False, name=name + "depthwise")(inputs)
+    elif use_separable:
+        nn = layers.SeparableConv2D(filters, kernel_size, strides=strides, padding="same", use_bias=False, name=name + "separable")(inputs)
+        # depthwise = layers.DepthwiseConv2D(kernel_size, strides=strides, padding=padding, use_bias=False, name=name + "depthwise")(inputs)
+        # nn = layers.Conv2D(filters, kernel_size=1, strides=1, padding="VALID", use_bias=False, name=name + "pointwise")(depthwise)
     else:
-        X = Input(shape=input_shape)
-    M = conv_block(X, 64, 3, 2, "same")  # Output Shape: (56, 56, 64)
-    M = separable_conv_block(M, 64, 3, 1)  # (56, 56, 64)
-    M = inverted_residual_block(M, 64, 3, t=2, strides=2, n=5, se=use_se)  # (28, 28, 64)
-    M = inverted_residual_block(M, 128, 3, t=4, strides=2, n=1, se=use_se)  # (14, 14, 128)
-    M = inverted_residual_block(M, 128, 3, t=2, strides=1, n=6, se=use_se)  # (14, 14, 128)
-    M = inverted_residual_block(M, 128, 3, t=4, strides=2, n=1, se=use_se)  # (7, 7, 128)
-    M = inverted_residual_block(M, 128, 3, t=2, strides=1, n=2, se=use_se)  # (7, 7, 128)
+        nn = layers.Conv2D(filters, kernel_size, strides=strides, padding=padding, use_bias=False, name=name + "conv")(inputs)
+
+    nn = layers.BatchNormalization(name=name + "bn")(nn)
+
+    if activation is not None and activation.lower() == "prelu":
+        nn = layers.PReLU(shared_axes=[1, 2], alpha_initializer=initializers.Constant(0.25), name=name + "prelu")(nn)
+    elif activation is not None:
+        nn = layers.Activation(activation=activation, name=name + activation)(nn)
+    return nn
+
+
+def bottleneck(inputs, filters, expand_ratio=1, kernel_size=1, strides=1, use_residual=False, use_se=False, name=""):
+    hidden_channels = int(inputs.shape[-1] * expand_ratio)
+
+    nn = conv_bn_prelu(inputs, hidden_channels, name=name + "1_")
+    nn = conv_bn_prelu(nn, kernel_size=kernel_size, strides=strides, use_depthwise=True, name=name + "2_")
+    nn = conv_bn_prelu(nn, filters, activation=None, name=name + "3_")
+
+    nn = se_block(nn, name=name + "se_") if use_se else nn
+    nn = layers.Add()([inputs, nn]) if use_residual else nn
+    return nn
+
+
+def MobileFaceNet(
+    num_blocks=[5, 1, 6, 1, 2],
+    out_channels=[64, 128, 128, 128, 128],
+    strides=[2, 2, 1, 2, 1],
+    expand_ratios=[2, 4, 2, 4, 2],
+    use_se=False,
+    emb_shape=256,
+    input_shape=(112, 112, 3),
+    dropout=0,
+    pretrained=None,
+    include_top=False,
+    name="mobile_facenet",
+):
+    inputs = layers.Input(shape=input_shape)  # (112, 112, 3)
+    nn = conv_bn_prelu(inputs, filters=64, kernel_size=3, strides=2, name="stem_1_")  # (56, 56, 64)
+    nn = conv_bn_prelu(nn, filters=64, kernel_size=3, strides=1, use_separable=True, name="stem_2_")  # (56, 56, 64)
+
+    for id, (num_block, out_channel, stride, expand_ratio) in enumerate(zip(num_blocks, out_channels, strides, expand_ratios)):
+        stack_name = "stack{}_".format(id + 1)
+        for block_id in range(num_block):
+            cur_strides = stride if block_id == 0 else 1
+            use_residual = False if block_id == 0 else True
+            block_name = stack_name + "block{}_".format(block_id + 1)
+            nn = bottleneck(nn, out_channel, expand_ratio, kernel_size=3, strides=cur_strides, use_residual=use_residual, use_se=use_se, name=block_name)
+
     if include_top:
+        """pointwise_conv"""
+        nn = conv_bn_prelu(nn, filters=512, name="header_pointwise_")
+
         """ GDC """
-        M = Conv2D(512, 1, use_bias=False)(M)  # (7, 7, 512)
-        M = BatchNormalization(axis=channel_axis)(M)
-        M = PReLU(shared_axes=[1, 2])(M)
-        M = DepthwiseConv2D(int(M.shape[1]), depth_multiplier=1, use_bias=False)(M)  # (1, 1, 512)
-        M = BatchNormalization(axis=channel_axis)(M)
+        nn = layers.DepthwiseConv2D(nn.shape[1], use_bias=False, name="header_gdc_depthwise")(nn)
+        nn = layers.BatchNormalization(name="header_gdc_bn")(nn)
 
         if dropout > 0 and dropout < 1:
-            M = Dropout(dropout)(M)
-        M = Conv2D(emb_shape, 1, use_bias=False, activation=None)(M)
-        M = Flatten()(M)
-        M = BatchNormalization(axis=channel_axis, name="embedding")(M)
-
-    model = Model(inputs=X, outputs=M, name=name)
-    if weight_file:
-        model.load_weights(weight_file)
+            nn = layers.Dropout(dropout)(nn)
+        nn = layers.Conv2D(emb_shape, 1, use_bias=False, name="header_gdc_post_conv")(nn)
+        nn = layers.Flatten()(nn)
+        nn = layers.BatchNormalization(name="pre_embedding")(nn)
+        nn = layers.Activation("linear", dtype="float32", name="embedding")(nn)
+
+    model = models.Model(inputs=inputs, outputs=nn, name=name)
+    if pretrained:
+        model.load_weights(pretrained)
     return model
diff --git a/backbones/resnet.py b/backbones/resnet.py
@@ -9,7 +9,7 @@
 
 
 def batchnorm_with_activation(inputs, activation="relu", zero_gamma=False, name=""):
-    """Performs a batch normalization followed by an activation. """
+    """Performs a batch normalization followed by an activation."""
     bn_axis = 3 if K.image_data_format() == "channels_last" else 1
     gamma_initializer = tf.zeros_initializer() if zero_gamma else tf.ones_initializer()
     nn = keras.layers.BatchNormalization(

diff --git a/backbones/vargface.py b/backbones/vargface.py
@@ -11,7 +11,7 @@
 
 
 def hard_swish(inputs, name=None):
-    """ `out = xx * relu6(xx + 3) / 6`, arxiv: https://arxiv.org/abs/1905.02244 """
+    """`out = xx * relu6(xx + 3) / 6`, arxiv: https://arxiv.org/abs/1905.02244"""
     return keras.layers.Multiply(name=name)([inputs, tf.nn.relu6(inputs + 3) / 6])
 
 
@@ -31,7 +31,7 @@ def activation_by_name(inputs, activation="relu", name=None):
 
 
 def batchnorm_with_activation(inputs, activation="relu", zero_gamma=False, epsilon=BATCH_NORM_EPSILON, name=None):
-    """ Performs a batch normalization followed by an activation. """
+    """Performs a batch normalization followed by an activation."""
     bn_axis = -1 if K.image_data_format() == "channels_last" else 1
     gamma_initializer = tf.zeros_initializer() if zero_gamma else tf.ones_initializer()
     nn = keras.layers.BatchNormalization(

diff --git a/data_distiller.py b/data_distiller.py
@@ -128,7 +128,7 @@ def __init__(self, data_path, model_file=None, dest_file=None, save_npz=False, b
         print(">>>> Output:", self.dest_file)
 
     def __init_ds_model_dest__(self):
-        """ Init dataset """
+        """Init dataset"""
         image_names, image_classes, _, classes, dataset_pickle_file_src = pre_process_folder(self.data_path)
         print(">>>> Image length: %d, Image class length: %d, classes: %d" % (len(image_names), len(image_classes), classes))
         if self.limit > 0:
@@ -179,7 +179,7 @@ def __extract_emb_gen__(self):
             yield imm.numpy(), label.numpy(), emb
 
     def __save_to_npz__(self):
-        """ Extract embeddings """
+        """Extract embeddings"""
         steps = int(np.ceil(self.total // self.batch_size)) + 1
         image_names, image_classes, embeddings = [], [], []
         for imm, label, emb in tqdm(self.emb_gen, self.tqdm_desc, total=steps):
@@ -190,7 +190,7 @@ def __save_to_npz__(self):
         np.savez_compressed(self.dest_file, image_names=image_names, image_classes=image_classes, embeddings=embeddings)
 
     def __save_to_tfrecord_by_batch__(self):
-        """ Encode feature definations, save also `classes` and `emb_shape` """
+        """Encode feature definations, save also `classes` and `emb_shape`"""
         self.encode_base_info = {
             "classes": tf.train.Feature(int64_list=tf.train.Int64List(value=[self.classes])),
             "emb_shape": tf.train.Feature(int64_list=tf.train.Int64List(value=[self.emb_shape])),

diff --git a/eval_folder.py b/eval_folder.py
@@ -180,7 +180,7 @@ def plot_tpr_far(score, label, new_figure=True, label_prefix=""):
         if label_prefix and len(label_prefix) > 0:
             label = label_prefix + " " + label
         plt.plot(fpr, tpr, lw=1, label=label)
-        plt.xlim([10 ** -6, 0.1])
+        plt.xlim([10**-6, 0.1])
         plt.xscale("log")
         plt.xticks(fpr_show)
         plt.xlabel("False Positive Rate")

diff --git a/face_detector.py b/face_detector.py
@@ -107,7 +107,7 @@ def show_result(self, image, bbs, pps=[], ccs=[]):
 
 
 class YoloV5FaceDetector(BaseDetector):
-    """ Yolov5-face Ported from https://github.com/deepcam-cn/yolov5-face """
+    """Yolov5-face Ported from https://github.com/deepcam-cn/yolov5-face"""
 
     def __init__(self, model_path=DEFAULT_DETECTOR, anchors=DEFAULT_ANCHORS, strides=DEFAULT_STRIDES):
         if isinstance(model_path, str) and model_path.startswith("http"):
@@ -177,7 +177,7 @@ def __call__(self, image, max_output_size=15, iou_threshold=0.45, score_threshol
 
 
 class SCRFD(BaseDetector):
-    """ SCRFD from https://github.com/deepinsight/insightface """
+    """SCRFD from https://github.com/deepinsight/insightface"""
 
     def __init__(self, det_shape=640):
         self.model = self.download_and_prepare_det()

diff --git a/losses.py b/losses.py
@@ -13,11 +13,11 @@ def __init__(self, power=2, scale=0.4, scale_all=1.0, from_logits=False, label_s
         self.power, self.scale, self.from_logits, self.label_smoothing = power, scale, from_logits, label_smoothing
         self.scale_all = scale_all
         if power != 1 and scale == 0:
-            self.logits_reduction_func = lambda xx: xx ** power
+            self.logits_reduction_func = lambda xx: xx**power
         elif power == 1 and scale != 0:
             self.logits_reduction_func = lambda xx: xx * scale
         else:
-            self.logits_reduction_func = lambda xx: (xx ** power + xx * scale) / 2
+            self.logits_reduction_func = lambda xx: (xx**power + xx * scale) / 2
 
     def call(self, y_true, y_pred):
         # margin_soft = tf.where(tf.cast(y_true, dtype=tf.bool), (y_pred ** self.power + y_pred * self.scale) / 2, y_pred)
@@ -231,7 +231,7 @@ def call(self, y_true, norm_logits):
 # [MagFace: A Universal Representation for Face Recognition and Quality Assessment](https://arxiv.org/pdf/2103.06627.pdf)
 @keras.utils.register_keras_serializable(package="keras_insightface")
 class MagFaceLoss(ArcfaceLossSimple):
-    """ Another set for fine-tune is: min_feature_norm, max_feature_norm, min_margin, max_margin, regularizer_loss_lambda = 1, 51, 0.45, 1, 5 """
+    """Another set for fine-tune is: min_feature_norm, max_feature_norm, min_margin, max_margin, regularizer_loss_lambda = 1, 51, 0.45, 1, 5"""
 
     def __init__(
         self,
@@ -254,7 +254,7 @@ def __init__(
         self.min_margin, self.max_margin = min_margin, max_margin
         self.use_cosface_margin, self.curricular_hard_scale = use_cosface_margin, curricular_hard_scale
         self.margin_scale = (max_margin - min_margin) / (max_feature_norm - min_feature_norm)
-        self.regularizer_loss_scale = 1.0 / (self.max_feature_norm ** 2)
+        self.regularizer_loss_scale = 1.0 / (self.max_feature_norm**2)
         self.use_curricular_scale = False
         self.epislon = 1e-3
         if curricular_hard_scale >= 0: