1、剪切对齐好的图片,用mxnet,源码中的 im2rec.py,生成的是乱序的,这边自己生成一个顺序的
生成lst,格式是,第一列不考虑,
0 0 aa/3275_18.jpg
0 0 aa/3275_25.jpg
0 0 aa/3275_4.jpg
…
0 3788 a2/314_18.jpg
0 3788 a2/314_7.jpg
0 3788 a2/314_20.jpg
from __future__ import print_function
import os
import sys
curr_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(curr_path, "../python"))
import mxnet as mx
import random
import argparse
import cv2
import time
import traceback
def get_lst():
f=open("train.lst","w")
count=0
for file in os.listdir("home/align/"):
for img in os.listdir("/home/align/"+file):
if not os.path.exists("/home/align/"+file+"/"+img):
continue
f.write(str(0)+"\t"+str(count)+"\t"+file+"/"+img+"\n")
count=count+1
f.close()
get_lst()
2、用作者的代码,face2rec2.py,得到rec数据,代码修改参考
https://github.com/deepinsight/insightface/issues/265
执行 python face2rec2.py /home/train
(lst路径)也是生成数据的路径
face2rec.py修改如下
def read_list(path_in):
# print ("************************")
# print (path_in)
# return
path = "/home/shiyy/nas/data/yidongface/aug_112_112_yidong_align" #me add is direct
with open(path_in) as fin:
identities = []
last = [-1, -1]
_id = 1
while True:
line = fin.readline()
if not line:
break
item = edict()
item.flag = 0
item.image_path, label, item.bbox, item.landmark, item.aligned = face_preprocess.parse_lst_line(line)
item.image_path = os.path.join(path, item.image_path) # image path me add
if not os.path.exists( item.image_path):
continue
if not item.aligned and item.landmark is None:
#print('ignore line', line)
print ("item.aligned,item.landmark",item.aligned,item.landmark)
continue
item.id = _id
# item.label = [label, item.aligned]
item.label = label #修改
yield item
if label!=last[0]:
if last[1]>=0:
identities.append( (last[1], _id) )
last[0] = label
last[1] = _id
_id+=1
identities.append( (last[1], _id) )
item = edict()
item.flag = 2
item.id = 0
item.label = [float(_id), float(_id+len(identities))]
yield item
for identity in identities:
item = edict()
item.flag = 2
item.id = _id
_id+=1
item.label = [float(identity[0]), float(identity[1])]
yield item
调用的程序 face_preprocess.py修改
def parse_lst_line(line):
vec = line.strip().split("\t")
assert len(vec)>=3
aligned = True#int(vec[0])
image_path = vec[2]#vec[1]
label = int(float(vec[1]))#int(vec[2])
bbox = None
landmark = None
#print(vec)
if len(vec)>3:
bbox = np.zeros( (4,), dtype=np.int32)
for i in xrange(3,7):
bbox[i-3] = int(vec[i])
landmark = None
if len(vec)>7:
_l = []
for i in xrange(7,17):
_l.append(float(vec[i]))
landmark = np.array(_l).reshape( (2,5) ).T
#print(aligned)
return image_path, label, bbox, landmark, aligned
3、合并merge,两个数据集,自己生成的数据和作者的数据合并
用作者的 src/data/dataset_merge.py
直接合并,parm1设置0.0
python dataset_merge.py
–include /home/shiyy/nas/all_workspace/insightface/datasets/faces_emore,/home/train
–output /new_data
过滤数据集合并
insightface, issue,中写法
@test4fest
Sorry, I am late to answer. I just see your problem.
When I run the dataset_merge.py, I use:
python dataset_merge.py --include /mnt/sdf_6t/cdt/datasets/DeepGlint,/mnt/sdf_6t/cdt/datasets/megaface --output /mnt/sdf_6t/cdt/datasets/rec/merge_5 --model ./model-r50-am-lfw/model,0 --param1 0.7 --gpu 0,1,2,3 --batch-size 256
in the --model ./model-r50-am-lfw/model,0 . the 0 mean that the model file is model-0000.params. param1 0.7 suggest that if two ID mean feature innerproduct is greater than 0.7, do not put one ID in output dataset .
If you use “LResNet50E-IR, 50”, maybe you need
change to “…/model/LResNet50E-IR/model,50”, and if the model size is larger than 300M or 200M(large probability, cause it include last FC layer), you should use /deploy/model_slim.py to truncate the FC layer of this model.
‘’’