数据收集
训练数据 – Adience数据集
Adience数据集来源为Flickr相册,由用户使用iPhone或者其它智能手机设备拍摄,该数据集主要用于进行年龄和性别的未经过滤的面孔估计。同时,里面还进行了相应的landmark的标注,其中包含2284个类别和26580张图片。
Adience数据集下载地址:http://www.openu.ac.il/home/hassner/Adience/data.html#agegender
预处理
由于之前我们已经获取到了很多含有人脸的图片,那对人脸图片我们需要怎么预处理呢?
- 找到对应人脸的位置
- 进行人脸角度对齐
主要实现代码
//1.使用Dlib库找到人脸的位置,已经对应的68个特征信息(landmark)
for img_name in data_set:
#print(img_name)
im = cv2.imread(img_name)
image = cv2.cvtColor(im, cv2.COLOR_BGRA2BGR)
rects = detector(image, 1) # 检测人脸
if len(rects) >= 1: # 检测到人脸
feats, landmark = get_fea_points(rects, image)
rotImg = face_align(image, src_landmark[ALIGN_POINTS], landmark[ALIGN_POINTS])
if rotImg is not None :
newrects = detector(rotImg, 1)
if len(newrects) >= 1:
left, top , right, bottom = get_left_right_top_bottom(newrects)
save_roi_image(rotImg, left, right, top, bottom, img_name)
else:
print("align not found face")
else:
print("not found face")
//2.对齐人脸,如果人脸倾斜了一定的角度,通过旋转的方式把人脸变成正的。 由于1步骤我们已经得到了人脸的68个关键点,我们这里只选取两个眼睛作为对齐的参考点,进行对齐,可以使用 Opencv提供的投射变换处理。或其他方法
def get_fea_points(rects, im):
global landmarks
feas = [] # 关键点
for i in range(len(rects)): # 遍历所有检测到的人脸(我的是单个人脸)
landmarks = numpy.matrix([[p.x, p.y] for p in predictor(im, rects[i]).parts()])
print("face num = ", i)
print(len(landmarks))
for idx, point in enumerate(landmarks):
pos = (point[0,0], point[0,1]) # 依次保存每个关键点
feas.append(pos)
# 在图上画出关键点
#cv2.circle(im, pos, 3, color=(0,255,0))
break;
#face_align(get_roi_image(im, left, right, top, bottom))
return feas, landmarks
#for i, d in enumerate(rects):
# print(d.left(), d.top(), d.right(), d.bottom())
# cv2.rectangle(im,(d.left(),d.top()),(d.right(),d.bottom()),(55,255,155),5)
# get_roi_image(im, d.left(), d.right(), d.top(), d.bottom())
#cv2.namedWindow("im", 2) # 显示标记特征点的图片
#cv2.imshow("im", im)
#cv2.waitKey(0)
def transformation_from_points(points1, points2):
"""
Return an affine transformation [s * R | T] such that:
sum ||s*R*p1,i + T - p2,i||^2
is minimized.
"""
# Solve the procrustes problem by subtracting centroids, scaling by the
# standard deviation, and then using the SVD to calculate the rotation. See
# the following for more details:
# https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
points1 = points1.astype(numpy.float64)
points2 = points2.astype(numpy.float64)
c1 = numpy.mean(points1, axis=0)
c2 = numpy.mean(points2, axis=0)
points1 -= c1
points2 -= c2
s1 = numpy.std(points1)
s2 = numpy.std(points2)
points1 /= s1
points2 /= s2
U, S, Vt = numpy.linalg.svd(points1.T * points2)
# The R we seek is in fact the transpose of the one given by U * Vt. This
# is because the above formulation assumes the matrix goes on the right
# (with row vectors) where as our solution requires the matrix to be on the
# left (with column vectors).
R = (U * Vt).T
return numpy.vstack([numpy.hstack(((s2 / s1) * R,
c2.T - (s2 / s1) * R * c1.T)),
numpy.matrix([0., 0., 1.])])
def warp_im(im, M, dshape):
output_im = numpy.zeros(dshape, dtype=im.dtype)
cv2.warpAffine(im,
M[:2],
(dshape[1], dshape[0]),
dst=output_im,
borderMode=cv2.BORDER_TRANSPARENT,
flags=cv2.WARP_INVERSE_MAP)
return output_im
def face_align(image, src_landmark, landmark):
print(len(src_landmark))
print(len(landmark))
warp_mat = transformation_from_points(src_landmark, landmark)
#print(warp_mat)
print("image.shape =", image.shape)
if warp_mat is not None :
rotImg = warp_im(image, warp_mat, image.shape)
#cv2.imshow("warp", rotImg)
#cv2.waitKey(0)
#output_im = numpy.zeros(image.shape, dtype=image.dtype)
#M = cv2.estimateRigidTransform(src_landmark, landmark, False)
#rotImg = cv2.warpAffine(image, M[:2], (image.shape[1], image.shape[0]), dst=output_im, borderMode=cv2.BORDER_TRANSPARENT, flags=cv2.WARP_INVERSE_MAP)
return rotImg
else:
return None
建立模型
模型
这里我们选用基础的CNN的模型
我们的模型与图中不同的地方就是输出层,我们的输出只有 男 女 两个类别,所以对于的输出只有一个节点。
模型代码
# 缩放图像的大小
IMAGE_HEIGHT = 227
IMAGE_WIDTH = 227
# 读取缩放图像
jpg_data = tf.placeholder(dtype=tf.string)
decode_jpg = tf.image.decode_jpeg(jpg_data, channels=3)
resize = tf.image.resize_images(decode_jpg, [IMAGE_HEIGHT, IMAGE_WIDTH])
resize = tf.cast(resize, tf.uint8) / 255
def resize_image(file_name):
with tf.gfile.FastGFile(file_name, 'r') as f:
image_data = f.read()
with tf.Session() as sess:
image = sess.run(resize, feed_dict={jpg_data: image_data})
return image
def get_next_batch(data_set, batch_size=128):
global pointer
batch_x = []
batch_y = []
print("get_nex_batch")
for i in range(batch_size):
#print("image is ",data_set[pointer][0])
batch_x.append(resize_image(data_set[pointer][0]))
if data_set[pointer][1] is 0:
batch_y.append([1,0])
else:
batch_y.append([0,1])
pointer += 1
branch_y = np.array(batch_y)
return batch_x, batch_y
batch_size = 128
num_batch = len(data_set) // batch_size
print("num_batch =", num_batch)
X = tf.placeholder(dtype=tf.float32, shape=[batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 3])
Y = tf.placeholder(dtype=tf.int32, shape=[batch_size, lables_size])
def conv_net(nlabels, images, pkeep=1.0):
weights_regularizer = tf.contrib.layers.l2_regularizer(0.0005)
with tf.variable_scope("conv_net", "conv_net", [images]) as scope:
with tf.contrib.slim.arg_scope([convolution2d, fully_connected], weights_regularizer=weights_regularizer, biases_initializer=tf.constant_initializer(1.), weights_initializer=tf.random_normal_initializer(stddev=0.005), trainable=True):
with tf.contrib.slim.arg_scope([convolution2d], weights_initializer=tf.random_normal_initializer(stddev=0.01)):
conv1 = convolution2d(images, 96, [7,7], [4, 4], padding='VALID', biases_initializer=tf.constant_initializer(0.), scope='conv1')
pool1 = max_pool2d(conv1, 3, 2, padding='VALID', scope='pool1')
norm1 = tf.nn.local_response_normalization(pool1, 5, alpha=0.0001, beta=0.75, name='norm1')
conv2 = convolution2d(norm1, 256, [5, 5], [1, 1], padding='SAME', scope='conv2')
pool2 = max_pool2d(conv2, 3, 2, padding='VALID', scope='pool2')
norm2 = tf.nn.local_response_normalization(pool2, 5, alpha=0.0001, beta=0.75, name='norm2')
conv3 = convolution2d(norm2, 384, [3, 3], [1, 1], biases_initializer=tf.constant_initializer(0.), padding='SAME', scope='conv3')
pool3 = max_pool2d(conv3, 3, 2, padding='VALID', scope='pool3')
flat = tf.reshape(pool3, [-1, 384*6*6], name='reshape')
full1 = fully_connected(flat, 512, scope='full1')
drop1 = tf.nn.dropout(full1, pkeep, name='drop1')
full2 = fully_connected(drop1, 512, scope='full2')
drop2 = tf.nn.dropout(full2, pkeep, name='drop2')
with tf.variable_scope('output') as scope:
weights = tf.Variable(tf.random_normal([512, nlabels], mean=0.0, stddev=0.01), name='weights')
biases = tf.Variable(tf.constant(0.0, shape=[nlabels], dtype=tf.float32), name='biases')
output = tf.add(tf.matmul(drop2, weights), biases, name=scope.name)
return output
def training():
logits = conv_net(lables_size, X)
def optimizer(eta, loss_fn):
global_step = tf.Variable(0, trainable=False)
optz = lambda lr: tf.train.MomentumOptimizer(lr, 0.9)
lr_decay_fn = lambda lr,global_step : tf.train.exponential_decay(lr, global_step, 100, 0.97, staircase=True)
return tf.contrib.layers.optimize_loss(loss_fn, global_step, eta, optz, clip_gradients=4., learning_rate_decay_fn=lr_decay_fn)
def loss(logits, labels):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
total_loss = cross_entropy_mean + 0.01 * sum(regularization_losses)
loss_averages = tf.train.ExponentialMovingAverage(0.9)
loss_averages_op = loss_averages.apply([cross_entropy_mean] + [total_loss])
with tf.control_dependencies([loss_averages_op]):
total_loss = tf.identity(total_loss)
return total_loss
# loss
total_loss = loss(logits, Y)
# optimizer
train_op = optimizer(0.001, total_loss)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
global pointer
epoch = 0
while True:
#print("begin")
pointer = 0
for batch in range(num_batch):
batch_x, batch_y = get_next_batch(data_set, batch_size)
print("begin train")
_, loss_value = sess.run([train_op, total_loss], feed_dict={X:batch_x, Y:batch_y})
print(epoch, batch, loss_value)
epoch += 1
if epoch % 10 == 0:
saver.save(sess, 'save_path/age.module' if AGE == True else 'save_path/sex.module', global_step = epoch)
if epoch % 20 == 0:
saver.save(sess, "save_path/age.module") # add one "my_net" dirotortory in your save else error
if epoch > 500:
break
training()
训练结果
训练相关
- 训练对齐后的图片大概28000张左右。
- 训练时长在24小时左右,因为迭代次数较多。
- 测试200张图片 准确率为70左右。
整个训练结果并不是很理想,可能是一下几个原因导致的。
- 收集的数据太混乱,有太多图片是不清晰的而且图片有形变。
- 预处理可能还不够好,人脸对齐可能不准确。
- 模型设计可能不够完善。