LFW数据集->rec文件
- 数据集准备:
- raw data下载:(提供两种来源)
- 已经对齐好到bin文件:https://download.csdn.net/download/ustczhng2012/15002984
- 数据对齐
- 基于原图+landmark点对齐图片
- LFW直接下载的raw data没有landmark点,一般需要landmark模型生成关键点,然后旋转缩放对齐后再生成bin文件
import cv2
import numpy as np
from skimage import transform as trans
src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
[51.157, 89.050], [57.025, 89.702]],
dtype=np.float32)
#<--left
src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
[45.177, 86.190], [64.246, 86.758]],
dtype=np.float32)
#---frontal
src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
[42.463, 87.010], [69.537, 87.010]],
dtype=np.float32)
#-->right
src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
[48.167, 86.758], [67.236, 86.190]],
dtype=np.float32)
#-->right profile
src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
[55.388, 89.702], [61.257, 89.050]],
dtype=np.float32)
src = np.array([src1, src2, src3, src4, src5])
src_map = {112: src, 224: src * 2}
arcface_src = np.array(
[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
[41.5493, 92.3655], [70.7299, 92.2041]],
dtype=np.float32)
arcface_src = np.expand_dims(arcface_src, axis=0)
# lmk is prediction; src is template
def estimate_norm(lmk, image_size=112, mode='arcface'):
assert lmk.shape == (5, 2)
tform = trans.SimilarityTransform()
lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
min_M = []
min_index = []
min_error = float('inf')
if mode == 'arcface':
assert image_size == 112
src = arcface_src
else:
src = src_map[image_size]
for i in np.arange(src.shape[0]):
tform.estimate(lmk, src[i])
M = tform.params[0:2, :]
results = np.dot(M, lmk_tran.T)
results = results.T
error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
# print(error)
if error < min_error:
min_error = error
min_M = M
min_index = i
return min_M, min_index
def norm_crop(img, landmark, image_size=112, mode='arcface'):
M, pose_index = estimate_norm(landmark, image_size, mode)
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
return warped
- 生成rec和idx文件
- 可以参考emore.rec生成方法,使用该方法不需要上述数据对齐脚本,里面自带对齐脚本,只需要配置好图片配置txt文件即可
- 创建pair文件
- 创建pairs.txt脚本:(参考链接)
import glob
import os.path
import numpy as np
import os
def find_not_zero_pos(sstr):
for i in range(len(sstr)):
if sstr[i] != "0":
return i
return len(sstr) - 1
def get_real_str(sstr):
i = find_not_zero_pos(sstr)
return sstr[i:]
def create_match_content():
matched_result = set()
k = 0
sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
while len(matched_result) < 3000:
for sub_dir in sub_dirs[1:]:
extensions = 'jpg'
file_list = []
dir_name = os.path.basename(sub_dir)
file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)
# glob.glob(file_glob)获取指定目录下的所有图片
file_list.extend(glob.glob(file_glob))
if not file_list:
continue
if len(file_list) >= 2:
label_name = dir_name
length = len(file_list)
random_number1 = np.random.randint(length)
random_number2 = np.random.randint(length)
while random_number1 == random_number2:
random_number1 = np.random.randint(length)
random_number2 = np.random.randint(length)
base_name1 = os.path.basename(file_list[random_number1 % length])
base_name2 = os.path.basename(file_list[random_number2 % length])
if (file_list[random_number1 % length] != file_list[random_number2 % length]):
base_name1 = base_name1.split("_")[-1]
base_name1 = base_name1.split(".")[0]
base_name2 = base_name2.split("_")[-1]
base_name2 = base_name2.split(".")[0]
matched_result.add(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))
# print(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))
k = k + 1
return matched_result, k
def create_unmatch_content():
"""不同类的匹配对"""
unmatched_result = set()
k = 0
while len(unmatched_result) < 3000:
sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
length_of_dir = len(sub_dirs)
for j in range(24):
for i in range(1, length_of_dir):
class1 = sub_dirs[i]
random_number = np.random.randint(length_of_dir)
while random_number == 0 | random_number == i:
random_number = np.random.randint(length_of_dir)
class2 = sub_dirs[random_number]
class1_name = os.path.basename(class1)
class2_name = os.path.basename(class2)
extensions = 'jpg'
file_list1 = []
file_list2 = []
file_glob1 = os.path.join(INPUT_DATA, class1_name, '*.' + extensions)
file_list1.extend(glob.glob(file_glob1))
file_glob2 = os.path.join(INPUT_DATA, class2_name, '*.' + extensions)
file_list2.extend(glob.glob(file_glob2))
if file_list1 and file_list2:
base_name1 = os.path.basename(file_list1[j % len(file_list1)])
base_name2 = os.path.basename(file_list2[j % len(file_list2)])
base_name1 = base_name1.split("_")[-1]
base_name1 = base_name1.split(".")[0]
base_name1 = get_real_str(base_name1)
base_name2 = base_name2.split("_")[-1]
base_name2 = base_name2.split(".")[0]
base_name2 = get_real_str(base_name2)
s = class2_name + ' ' + base_name2 + ' ' + class1_name + ' ' + base_name1
if (s not in unmatched_result):
unmatched_result.add(s)
if len(unmatched_result) > 3000:
break
k = k + 1
return unmatched_result, k
if __name__ == '__main__':
INPUT_DATA = r'dataset/val/lfw-deepfunneled'
txt_path = 'dataset/val/pairs.txt'
if os.path.isfile(txt_path):
os.remove(txt_path)
result, k1 = create_match_content()
print(k1)
# print(result)
result_un, k2 = create_unmatch_content()
print(k2)
# print(result_un)
file = open(txt_path, 'w')
result1 = list(result)
result2 = list(result_un)
file.write('10 300\n')
for i in range(10):
for pair in result1[i * 300:i * 300 + 300]:
file.write(pair + '\n')
for pair in result2[i * 300:i * 300 + 300]:
file.write(pair + '\n')
file.close()
- 生成验证集bin文件
- 注意:生成到bin文件数据是BGR格式的,在使用时需要注意
import pickle
import os
from tqdm import tqdm
import cv2
if __name__ == '__main__':
lfw_dir = r'dataset/val/lfw-deepfunneled'
image_size = (112, 112)
pairs_path = r'dataset/val/pairs.txt'
bin_output = r'dataset/val/lfw.bin'
lfw_bins = []
issame_list = []
with open(pairs_path, 'r') as f:
lines = f.readlines()
line = lines[0].strip().split(' ')
for i in tqdm(range(1, len(lines))):
temp = lines[i].strip().split(' ')
if len(temp) == 3:
img_path_1 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[1].zfill(4)+'.jpg')
img_1 = cv2.imread(img_path_1)
img_1 = cv2.resize(img_1, image_size)
img_save_path_1 = os.path.join(lfw_dir, temp[0], temp[0] + '_' + temp[1].zfill(4) + '_resize.jpg')
cv2.imwrite(img_save_path_1, img_1)
with open(img_save_path_1, 'rb') as fin:
_bin_1 = fin.read()
lfw_bins.append(_bin_1)
img_path_2 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[2].zfill(4)+'.jpg')
img_2 = cv2.imread(img_path_2)
img_2 = cv2.resize(img_2, image_size)
img_save_path_2 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[2].zfill(4)+'_resize.jpg')
cv2.imwrite(img_save_path_2, img_2)
with open(img_save_path_2, 'rb') as fin:
_bin_2 = fin.read()
lfw_bins.append(_bin_2)
issame_list.append(1)
elif len(temp) == 4:
img_path_1 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[1].zfill(4)+'.jpg')
img_1 = cv2.imread(img_path_1)
img_1 = cv2.resize(img_1, image_size)
img_save_path_1 = os.path.join(lfw_dir, temp[0], temp[0] + '_' + temp[1].zfill(4) + '_resize.jpg')
cv2.imwrite(img_save_path_1, img_1)
with open(img_save_path_1, 'rb') as fin:
_bin_1 = fin.read()
lfw_bins.append(_bin_1)
img_path_2 = os.path.join(lfw_dir, temp[2], temp[2]+'_'+temp[3].zfill(4)+'.jpg')
img_2 = cv2.imread(img_path_2)
img_2 = cv2.resize(img_2, image_size)
img_save_path_2 = os.path.join(lfw_dir, temp[2], temp[2] + '_' + temp[3].zfill(4) + '_resize.jpg')
cv2.imwrite(img_save_path_2, img_2)
with open(img_save_path_2, 'rb') as fin:
_bin_2 = fin.read()
lfw_bins.append(_bin_2)
issame_list.append(0)
else:
print("drop this line: %d" %i)
continue
with open(bin_output, 'wb') as f:
pickle.dump((lfw_bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)