tensorflow根据人脸分辨性别

数据收集

训练数据 – Adience数据集

Adience数据集来源为Flickr相册,由用户使用iPhone或者其它智能手机设备拍摄,该数据集主要用于进行年龄和性别的未经过滤的面孔估计。同时,里面还进行了相应的landmark的标注,其中包含2284个类别和26580张图片。

Adience数据集下载地址:http://www.openu.ac.il/home/hassner/Adience/data.html#agegender

预处理

由于之前我们已经获取到了很多含有人脸的图片,那对人脸图片我们需要怎么预处理呢?

  • 找到对应人脸的位置
  • 进行人脸角度对齐

主要实现代码

//1.使用Dlib库找到人脸的位置,已经对应的68个特征信息(landmark)
for img_name in data_set:
    #print(img_name)
    im = cv2.imread(img_name)
    image = cv2.cvtColor(im, cv2.COLOR_BGRA2BGR)
    rects = detector(image, 1)  # 检测人脸
    if len(rects) >= 1:      # 检测到人脸
        feats, landmark =  get_fea_points(rects, image)
        rotImg = face_align(image, src_landmark[ALIGN_POINTS], landmark[ALIGN_POINTS])
        if rotImg is not None :
            newrects = detector(rotImg, 1)
            if len(newrects) >= 1:
                left, top , right, bottom = get_left_right_top_bottom(newrects)
                save_roi_image(rotImg, left, right, top, bottom, img_name)
            else:
                print("align not found face")
    else:
        print("not found face")

//2.对齐人脸,如果人脸倾斜了一定的角度,通过旋转的方式把人脸变成正的。 由于1步骤我们已经得到了人脸的68个关键点,我们这里只选取两个眼睛作为对齐的参考点,进行对齐,可以使用 Opencv提供的投射变换处理。或其他方法
def get_fea_points(rects, im):  
    global landmarks
    feas = []   # 关键点
      
    for i in range(len(rects)):    # 遍历所有检测到的人脸我的是单个人脸  
        landmarks = numpy.matrix([[p.x, p.y] for p in predictor(im, rects[i]).parts()])
        print("face num = ", i)
        print(len(landmarks))
   
        for idx, point in enumerate(landmarks):  
            pos = (point[0,0], point[0,1])   # 依次保存每个关键点  
            feas.append(pos)  
            # 在图上画出关键点  
            #cv2.circle(im, pos, 3, color=(0,255,0))
        break;

    #face_align(get_roi_image(im, left, right, top, bottom))
    return feas, landmarks
    #for i, d in enumerate(rects):
    #    print(d.left(), d.top(), d.right(), d.bottom())
    #    cv2.rectangle(im,(d.left(),d.top()),(d.right(),d.bottom()),(55,255,155),5)
    #    get_roi_image(im, d.left(), d.right(), d.top(), d.bottom())
    #cv2.namedWindow("im", 2)  # 显示标记特征点的图片  
    #cv2.imshow("im", im)  
    #cv2.waitKey(0)  

def transformation_from_points(points1, points2):
    """
    Return an affine transformation [s * R | T] such that:
        sum ||s*R*p1,i + T - p2,i||^2
    is minimized.
    """
    # Solve the procrustes problem by subtracting centroids, scaling by the
    # standard deviation, and then using the SVD to calculate the rotation. See
    # the following for more details:
    #   https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem

    points1 = points1.astype(numpy.float64)
    points2 = points2.astype(numpy.float64)

    c1 = numpy.mean(points1, axis=0)
    c2 = numpy.mean(points2, axis=0)
    points1 -= c1
    points2 -= c2

    s1 = numpy.std(points1)
    s2 = numpy.std(points2)
    points1 /= s1
    points2 /= s2

    U, S, Vt = numpy.linalg.svd(points1.T * points2)

    # The R we seek is in fact the transpose of the one given by U * Vt. This
    # is because the above formulation assumes the matrix goes on the right
    # (with row vectors) where as our solution requires the matrix to be on the
    # left (with column vectors).
    R = (U * Vt).T

    return numpy.vstack([numpy.hstack(((s2 / s1) * R,
                                       c2.T - (s2 / s1) * R * c1.T)),
                         numpy.matrix([0., 0., 1.])])

def warp_im(im, M, dshape):
    output_im = numpy.zeros(dshape, dtype=im.dtype)
    cv2.warpAffine(im,
                   M[:2],
                   (dshape[1], dshape[0]),
                   dst=output_im,
                   borderMode=cv2.BORDER_TRANSPARENT,
                   flags=cv2.WARP_INVERSE_MAP)
    return output_im

def face_align(image, src_landmark, landmark):
    print(len(src_landmark))
    print(len(landmark))
    warp_mat  = transformation_from_points(src_landmark, landmark)
    #print(warp_mat)
    print("image.shape =", image.shape)
    if warp_mat is not None :
        rotImg = warp_im(image, warp_mat, image.shape)
    #cv2.imshow("warp", rotImg)
    #cv2.waitKey(0)
    #output_im = numpy.zeros(image.shape, dtype=image.dtype)

    #M = cv2.estimateRigidTransform(src_landmark, landmark, False)

    #rotImg = cv2.warpAffine(image, M[:2], (image.shape[1], image.shape[0]), dst=output_im, borderMode=cv2.BORDER_TRANSPARENT, flags=cv2.WARP_INVERSE_MAP)
    
        return rotImg
    else:
        return None

建立模型

模型

这里我们选用基础的CNN的模型
Le net5.png
我们的模型与图中不同的地方就是输出层,我们的输出只有 男 女 两个类别,所以对于的输出只有一个节点。

模型代码

# 缩放图像的大小
IMAGE_HEIGHT = 227
IMAGE_WIDTH = 227
# 读取缩放图像
jpg_data = tf.placeholder(dtype=tf.string)
decode_jpg = tf.image.decode_jpeg(jpg_data, channels=3)
resize = tf.image.resize_images(decode_jpg, [IMAGE_HEIGHT, IMAGE_WIDTH])
resize = tf.cast(resize, tf.uint8) / 255
def resize_image(file_name):
    with tf.gfile.FastGFile(file_name, 'r') as f:
        image_data = f.read()
    with tf.Session() as sess:
        image = sess.run(resize, feed_dict={jpg_data: image_data})
    return image

def get_next_batch(data_set, batch_size=128):
    global pointer
    batch_x = []
    batch_y = []
    print("get_nex_batch")
    for i in range(batch_size):
        #print("image is ",data_set[pointer][0])
        batch_x.append(resize_image(data_set[pointer][0]))
        if data_set[pointer][1] is 0:
            batch_y.append([1,0])
        else:
            batch_y.append([0,1])
        pointer += 1

    branch_y = np.array(batch_y)
    return batch_x, batch_y

batch_size = 128
num_batch = len(data_set) // batch_size
print("num_batch =", num_batch)
X = tf.placeholder(dtype=tf.float32, shape=[batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 3])
Y = tf.placeholder(dtype=tf.int32, shape=[batch_size, lables_size])
 
def conv_net(nlabels, images, pkeep=1.0):
    weights_regularizer = tf.contrib.layers.l2_regularizer(0.0005)
    with tf.variable_scope("conv_net", "conv_net", [images]) as scope:
        with tf.contrib.slim.arg_scope([convolution2d, fully_connected], weights_regularizer=weights_regularizer, biases_initializer=tf.constant_initializer(1.), weights_initializer=tf.random_normal_initializer(stddev=0.005), trainable=True):
            with tf.contrib.slim.arg_scope([convolution2d], weights_initializer=tf.random_normal_initializer(stddev=0.01)):
                conv1 = convolution2d(images, 96, [7,7], [4, 4], padding='VALID', biases_initializer=tf.constant_initializer(0.), scope='conv1')
                pool1 = max_pool2d(conv1, 3, 2, padding='VALID', scope='pool1')
                norm1 = tf.nn.local_response_normalization(pool1, 5, alpha=0.0001, beta=0.75, name='norm1')
                conv2 = convolution2d(norm1, 256, [5, 5], [1, 1], padding='SAME', scope='conv2') 
                pool2 = max_pool2d(conv2, 3, 2, padding='VALID', scope='pool2')
                norm2 = tf.nn.local_response_normalization(pool2, 5, alpha=0.0001, beta=0.75, name='norm2')
                conv3 = convolution2d(norm2, 384, [3, 3], [1, 1], biases_initializer=tf.constant_initializer(0.), padding='SAME', scope='conv3')
                pool3 = max_pool2d(conv3, 3, 2, padding='VALID', scope='pool3')
                flat = tf.reshape(pool3, [-1, 384*6*6], name='reshape')
                full1 = fully_connected(flat, 512, scope='full1')
                drop1 = tf.nn.dropout(full1, pkeep, name='drop1')
                full2 = fully_connected(drop1, 512, scope='full2')
                drop2 = tf.nn.dropout(full2, pkeep, name='drop2')
    with tf.variable_scope('output') as scope:
        weights = tf.Variable(tf.random_normal([512, nlabels], mean=0.0, stddev=0.01), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[nlabels], dtype=tf.float32), name='biases')
        output = tf.add(tf.matmul(drop2, weights), biases, name=scope.name)
    return output
 
def training():
    logits = conv_net(lables_size, X)
    def optimizer(eta, loss_fn):
        global_step = tf.Variable(0, trainable=False)
        optz = lambda lr: tf.train.MomentumOptimizer(lr, 0.9)
        lr_decay_fn = lambda lr,global_step : tf.train.exponential_decay(lr, global_step, 100, 0.97, staircase=True)
        return tf.contrib.layers.optimize_loss(loss_fn, global_step, eta, optz, clip_gradients=4., learning_rate_decay_fn=lr_decay_fn)
 
    def loss(logits, labels):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = cross_entropy_mean + 0.01 * sum(regularization_losses)
        loss_averages = tf.train.ExponentialMovingAverage(0.9)
        loss_averages_op = loss_averages.apply([cross_entropy_mean] + [total_loss])
        with tf.control_dependencies([loss_averages_op]):
            total_loss = tf.identity(total_loss)
        return total_loss
    # loss
    total_loss = loss(logits, Y)
    # optimizer
    train_op = optimizer(0.001, total_loss)
 
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        global pointer
        epoch = 0
        while True:
            #print("begin")
            pointer = 0
            for batch in range(num_batch):
                batch_x, batch_y = get_next_batch(data_set, batch_size)
                print("begin train")
                _, loss_value = sess.run([train_op, total_loss], feed_dict={X:batch_x, Y:batch_y})
                print(epoch, batch, loss_value)
            epoch += 1
            if epoch % 10 == 0:
                saver.save(sess, 'save_path/age.module' if AGE == True else 'save_path/sex.module',  global_step = epoch)
            if epoch % 20 == 0:
                saver.save(sess, "save_path/age.module")   # add one "my_net" dirotortory in your save else error
            if epoch > 500:
                break
        
training()

训练结果

训练相关

  • 训练对齐后的图片大概28000张左右。
  • 训练时长在24小时左右,因为迭代次数较多。
  • 测试200张图片 准确率为70左右。

整个训练结果并不是很理想,可能是一下几个原因导致的。

  • 收集的数据太混乱,有太多图片是不清晰的而且图片有形变。
  • 预处理可能还不够好,人脸对齐可能不准确。
  • 模型设计可能不够完善。
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

NineDays66

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值