tensorflow--------Geometry Guided Pose-Invariant Facial Expression Recognition

最新推荐文章于 2024-03-19 13:51:20 发布

qq_35482604

最新推荐文章于 2024-03-19 13:51:20 发布

阅读量420

点赞数 2

本文链接：https://blog.csdn.net/qq_35482604/article/details/106733620

版权

论文： Geometry Guided Pose-Invariant Facial Expression Recognition
github: https://github.com/FFZhang1231/Facial-expression-recognition

   self.f = self.encoder(
        image=self.input_image
    )

    self.PE = self.encoder_pose(
        pose=self.pose,
        is_training=self.is_training
    )

    self.G = self.generator(
        f=self.f,
        latent_variable=self.PE,
        # y=self.expression,
        enable_tile_label=self.enable_tile_label,
        tile_ratio=self.tile_ratio
    )

在这里插入图片描述

f(x)

def encoder(self, image, reuse_variables=False):
    # if reuse_variables:
    #     tf.get_variable_scope().reuse_variables()

    with tf.variable_scope("", reuse=reuse_variables):
        num_layers = int(np.log2(self.size_image)) - int(self.size_kernel / 2)
        current = image
        # conv layers with stride 2
        for i in range(num_layers):
            name = 'E_conv' + str(i)
            current = conv2d(
                input_map=current,
                num_output_channels=self.num_encoder_channels * (2 ** i),
                size_kernel=self.size_kernel,
                name=name
            )
            current = tf.nn.relu(current)

        # fully connection layer
        name = 'E_fc'
        current = fc(
            input_vector=tf.reshape(current, [self.size_batch, -1]),
            num_output_length=self.num_fx,
            name=name
        )

        # output
        return tf.nn.tanh(current)

f(g’)

 def encoder_pose(self, pose, is_training=True, reuse_variables=False, num_hidden_layer_channels=(128, 64, 32),
                     enable_bn=True):

        with tf.variable_scope("", reuse=reuse_variables):


            current = pose

            for i in range(len(num_hidden_layer_channels)):
                name = 'E_p_fc' + str(i)
                current = fc(
                    input_vector=current,
                    num_output_length=num_hidden_layer_channels[i],
                    name=name
                )
                if enable_bn:
                    name = 'E_p_bn' + str(i)
                    current = tf.contrib.layers.batch_norm(
                        current,
                        scale=False,
                        is_training=is_training,
                        scope=name,
                        reuse=reuse_variables
                    )
                current = tf.nn.relu(current)
            latent_variable = current
            return latent_variable

生成网络G

在对抗网络的基础上,引入f(g’)人脸关键点几何等先验信息.
生成网络输入包括,输入图像特征向量f(x),f(g’)【z,y】

def generator(self, f, latent_variable, reuse_variables=False, enable_tile_label=True, tile_ratio=1.0):
    # if reuse_variables:
    #     tf.get_variable_scope().reuse_variables()

    with tf.variable_scope("", reuse=reuse_variables):
        num_layers = int(np.log2(self.size_image)) - int(self.size_kernel / 2)

        if enable_tile_label:
            duplicate = int(self.num_fx * tile_ratio / self.num_poses)
        else:
            duplicate = 1
        f = concat_label(f, latent_variable, duplicate=duplicate)
        size_mini_map = int(self.size_image / 2 ** num_layers)
        # fc layer
        name = 'G_fc'
        current = fc(
            input_vector=f,
            num_output_length=self.num_gen_channels * size_mini_map * size_mini_map,
            name=name
        )
        # reshape to cube for deconv
        current = tf.reshape(current, [-1, size_mini_map, size_mini_map, self.num_gen_channels])
        current = tf.nn.relu(current)
        # deconv layers with stride 2
        for i in range(num_layers):
            name = 'G_deconv' + str(i)
            current = deconv2d(
                input_map=current,
                output_shape=[self.size_batch,
                              size_mini_map * 2 ** (i + 1),
                              size_mini_map * 2 ** (i + 1),
                              int(self.num_gen_channels / 2 ** (i + 1))],
                size_kernel=self.size_kernel,
                name=name
            )
            current = tf.nn.relu(current)
        name = 'G_deconv' + str(i + 1)
        current = deconv2d(
            input_map=current,
            output_shape=[self.size_batch,
                          self.size_image,
                          self.size_image,
                          int(self.num_gen_channels / 2 ** (i + 2))],
            size_kernel=self.size_kernel,
            stride=1,
            name=name
        )
        current = tf.nn.relu(current)
        name = 'G_deconv' + str(i + 2)
        current = deconv2d(
            input_map=current,
            output_shape=[self.size_batch,
                          self.size_image,
                          self.size_image,
                          self.num_input_channels],
            size_kernel=self.size_kernel,
            stride=1,
            name=name
        )

        # output
        return tf.nn.tanh(current)

重构几何关键点
在这里插入图片描述

self.PD = self.decoder_pose(
            latent_variable=self.PE,
            is_training=self.is_training
        )

 def decoder_pose(self, latent_variable, is_training=True, reuse_variables=False,
                     num_hidden_layer_channels=(64, 128, 136), enable_bn=True):
        with tf.variable_scope("", reuse=reuse_variables):
            current = latent_variable

            for i in range(len(num_hidden_layer_channels)):
                name = 'D_p_fc' + str(i)
                current = fc(
                    input_vector=current,
                    num_output_length=num_hidden_layer_channels[i],
                    name=name
                )
                if enable_bn:
                    name = 'D_p_bn' + str(i)
                    current = tf.contrib.layers.batch_norm(
                        current,
                        scale=False,
                        is_training=is_training,
                        scope=name,
                        reuse=reuse_variables
                    )
                current = tf.nn.relu(current)
            p_output = current
            return p_output

3 根据输入图像判别表情，姿态类别

self.D_input_ex_logits, self.D_input_pose_logits= self.discriminator_acc(
    image=self.input_image,
    is_training=self.is_training
)

在这里插入图片描述

def discriminator_acc(self,image,is_training=True, reuse_variables=False):
    current = image
    with tf.variable_scope("RS", reuse=reuse_variables):
        with tf.variable_scope("my_resnet"):
            base_model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet', input_tensor=current,input_shape=None, pooling=max, classes=1000)
        #pdb.set_trace()
        my_output = base_model(current)

        name = 'D_acc_fc1'
        current1 = fc(
            input_vector=tf.reshape(my_output, [self.size_batch, -1]),
            num_output_length=1024,
            name=name
        )
        current1 = lrelu(current1)
        if self.is_training:
            current1 = tf.nn.dropout(current1, 0.5)

        name = 'D_acc_fc2'
        current2 = fc(
                input_vector=tf.reshape(current1, [self.size_batch, -1]),
                num_output_length=self.num_categories,
                name=name
            )
        name = 'D_acc_fc3'
        current3 = fc(
            input_vector=tf.reshape(current1, [self.size_batch, -1]),
            num_output_length=self.num_poses,
            name=name
            )

        return current2,current3

根据f(x)判定身份类别(0,1)

# discriminator on identity
self.D_f, self.D_f_logits = self.discriminator_i(
    f=self.f,
    is_training=self.is_training
)

在这里插入图片描述

def discriminator_i(self, f, is_training=True, reuse_variables=False, num_hidden_layer_channels=(64, 32, 16),
                    enable_bn=True):

    with tf.variable_scope("", reuse=reuse_variables):
        current = f
        # fully connection layer
        for i in range(len(num_hidden_layer_channels)):
            name = 'D_f_fc' + str(i)
            current = fc(
                input_vector=current,
                num_output_length=num_hidden_layer_channels[i],
                name=name
            )
            if enable_bn:
                name = 'D_f_bn' + str(i)
                current = tf.contrib.layers.batch_norm(
                    current,
                    scale=False,
                    is_training=is_training,
                    scope=name,
                    reuse=reuse_variables
                )
            current = tf.nn.relu(current)
        # output layer
        name = 'D_f_fc' + str(i + 1)
        current = fc(
            input_vector=current,
            num_output_length=1,
            name=name
        )
        return tf.nn.sigmoid(current), current

在这里插入图片描述

self.D_G, self.D_G_logits = self.discriminator_att(
            image=self.G,
            pose=self.pose,
            is_training=self.is_training
        )

def discriminator_att(self, image, pose, is_training=True, reuse_variables=False,
                      num_hidden_layer_channels=(16, 32, 64, 128), enable_bn=True):
    with tf.variable_scope("", reuse=reuse_variables):
        num_layers = len(num_hidden_layer_channels)
        current = image
        # conv layers with stride 2
        for i in range(num_layers):
            name = 'D_img_conv' + str(i)
            # pdb.set_trace()
            current = conv2d(
                input_map=current,
                num_output_channels=num_hidden_layer_channels[i],
                size_kernel=self.size_kernel,
                name=name
            )
            # pdb.set_trace()
            if enable_bn:
                name = 'D_img_bn' + str(i)
                current = tf.contrib.layers.batch_norm(
                    current,
                    scale=False,
                    is_training=is_training,
                    scope=name,
                    reuse=reuse_variables
                )

            current = tf.nn.relu(current)
            if i == 0:
                # current = concat_label(current, y)
                current = concat_label(current, pose, int(self.num_categories / self.num_poses))
        # fully connection layer
        name = 'D_img_fc1'
        current = fc(
            input_vector=tf.reshape(current, [self.size_batch, -1]),
            num_output_length=1024,
            name=name
        )
        current = lrelu(current)
        name = 'D_img_fc2'
        current1 = fc(
            input_vector=current,
            num_output_length=1,
            name=name
        )
        return tf.nn.sigmoid(current1), current

在这里插入图片描述
几何嵌入网络E是为了提取几何特征向量
编码器-解码器结构的发生器G，产生新的。编码器Genc的输入是任意表情和姿态的人脸图像，它从输入的人脸图像学习到特征表示的映射。表示然后与几何信息连接，以送入Gdec。译码器Gdec的输出是一幅具有目标表情和姿态的合成人脸图像，学习到的身份表示是Genc和Gdec之间的桥梁。
Datt用于在一个潜在空间中从面部图像中分离出姿态、表情和身份，以改变属性(表情、姿态)但保留身份。
Di来控制身份特征的分布。
分类器Cexp，力争使生成的人脸图像与输入的真实人脸图像具有相同的表情

qq_35482604

关注

2
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
tensorflow--------Geometry Guided Pose-Invariant Facial Expression Recognition

self.f = self.encoder( image=self.input_image ) self.PE = self.encoder_pose( pose=self.pose, is_training=self.is_training ) self.G = self.generator( f=self.f, latent_variable=self.PE, # y...
复制链接

扫一扫