一:制作lmdb数据
制作成VOC的数据格式,在项目目录下分别创建几个目录
Annotations:该文件夹用来保存生成的xml文件
JPEGImages:该文件夹用来保存所有的图片,
labels:该文件夹用来保存所有的txt文件。
ImageSets:Main:创建ImageSets文件夹并在里面创建Main文件夹。
由于图片数据和标注的txt文件存在一些错误,因此在制作lmdb之前首先要对图片数据和标注的txt文档进行一些预处理,
1.如果是在windows上用winscp把数据上传到ubuntu上面,那么注意上传的时候配置下winscp,选择UTF8格式,否则传到ubuntu上之后图片名字和txt文件名字是乱码,
2.有些jpg图片的后缀是大写的".JPG",需要把后缀变成小写的".jpg"。脚本如下,
import os
jpgs_dir = "./JPEGImages"
for jpg_name in os.listdir(jpgs_dir):
portion = os.path.splitext(jpg_name)
if portion[1] == ".JPG":
new_name = portion[0] + ".jpg"
jpg_name = os.path.join(jpgs_dir, jpg_name)#os.rename的参数需要的是路径和文件名,所以这里要加上路径,要不然脚本执行失败。
new_name = os.path.join(jpgs_dir, new_name)
print(jpg_name)
print(new_name)
os.rename(jpg_name, new_name)
3.有些jpg和txt的名字里面是带有空格的,需要把名字里面的空格删掉,脚本如下(该脚本把txt名字里面的空格也一起修改了),
import os
image_dir = "./delete_images"
txt_dir = "./delete_labels"
def delete_space(buff_dir):
for files_name in os.listdir(buff_dir):
if len(files_name.split(" ")) >1:
os.rename(os.path.join(buff_dir,files_name),
os.path.join(buff_dir,files_name.replace(" ","_")))
print(os.path.join(buff_dir,files_name.replace(" ","_")))
delete_space(image_dir)
delete_space(txt_dir)
4.标注产生的txt文件的格式可能有不同的格式,
格式一:
1 1023 93 1079 137
1 1033 21 1077 59
1 1036 499 1234 645
0 1047 112 1071 164
1 1069 67 1117 105
格式二:
biker,1463,404,126,69
pedestrian,1514,375,14,35
pedestrian,1543,385,14,36
两种方式的主要有三点,
(1).目标的表示方式不同,一个用数字表示,一个用英文单词表示,
(2).中间的分割方式不同,一个用空格,一个用逗号
(3).坐标表示方式不同,一个是x1 y1 x2 y2,另一个是x,y,w,h用脚本把第一种格式转换为第二种格式,脚本如下,
import os
rename_flg = 0
change_num = 0
dirpath = "labels/label4"
#dirpath = "test_delete"
for txt_name in os.listdir(dirpath):#列出当前目录下的所有文件名
#print(os.listdir(dirpath))
with open(os.path.join(dirpath, txt_name), 'r') as f, open(os.path.join(dirpath, "bak%s"%txt_name), 'w') as f_new:
print(os.path.join(dirpath, "bak%s"%txt_name))
for line in f.readlines():
if 1 == len(line.split(" ")):#说明这个文件的格式不用修改,直接break
rename_flg = 0
break
elif len(line.split(" ")) > 1:#说明是第一种空格分割的形式,只有这种形式的才进行转换。
rename_flg = 1
if '0' == line.split(" ")[0]:
line1 = 'pedestrian' + ',' + line.split(" ")[1] + ',' + line.split(" ")[2] + ','
line = line1 + str(int(line.split(" ")[3]) - int(line.split(" ")[1])) + ',' + str(int(line.split(" ")[4]) - int(line.split(" ")[2]))
#line = line.replace(line.split(" ")[0], 'pedestrian')#不能用replace,replace会把后面的数字0 1 2 也替换成英语单词。
elif '1' == line.split(" ")[0]:
line1 = 'vehicle' + ',' + line.split(" ")[1] + ',' + line.split(" ")[2] + ','
line = line1 + str(int(line.split(" ")[3]) - int(line.split(" ")[1])) + ',' + str(int(line.split(" ")[4]) - int(line.split(" ")[2]))
elif '2' == line.split(" ")[0]:
line1 = 'biker' + ',' + line.split(" ")[1] + ',' + line.split(" ")[2] + ','
line = line1 + str(int(line.split(" ")[3]) - int(line.split(" ")[1])) + ',' + str(int(line.split(" ")[4]) - int(line.split(" ")[2]))
#print(line)
f_new.write(line)
f_new.write("\r\n")
if rename_flg == 1:#如果不加这个判断,那么不需要修改格式的txt文件也会被改变。
change_num = change_num + 1#记录下一共修改了多少个文件,
os.remove(os.path.join(dirpath, txt_name))
os.rename(os.path.join(dirpath, "bak%s"%txt_name), os.path.join(dirpath, txt_name))
elif rename_flg == 0:
os.remove(os.path.join(dirpath, "bak%s"%txt_name))
print('change_num:', change_num)
5.有些txt文件里面的坐标值可能只有两个,用脚本把这种txt文件直接删除,脚本如下,
"""
有些标注的txt文件里面是错误的,例如目标后面的坐标值本来应该是pedestrian,1138,306,18,56
但是它后面的坐标只有两个,pedestrian,1138,306这样在后面进行txt to xml转换的时候会发生错误,
因此编写脚本把这种错误的txt找出来,删掉。
"""
import os
delete_labels = []
labels_dir = "./labels"
#labels_dir = "./delete_labels"
for label in os.listdir(labels_dir):
with open(os.path.join(labels_dir, label), 'r') as f:
for line in f.readlines():
if 5 != len(line.split(",")):#说明坐标是少的,这种要删除,
print(label)
delete_labels.append(label)
for label in delete_labels:
os.remove(os.path.join(labels_dir, label))
6.检测的分类明明只有biker,pedestrian,pedestrian,但是在制作LMDB的时候却提示未知的name face,这是因为标注的txt文件里面竟然有face分类。。。。。。。。。。。。。,于是要编写脚本看一下txt文件里面是不是只有biker,pedestrian,pedestrian,三个分类,如果有多余的分类,那么把相应的txt文件删除掉,
首先是查看分类类别的脚本,看是否只有这三类,脚本如下:
import os
import numpy as np
txt_dir = "./labels"
buff_list = []
buff_dir = txt_dir
for files_name in os.listdir(buff_dir):
with open(os.path.join(buff_dir,files_name),'r') as f:
for line in f.readlines():
buff_list.append(line.split(",")[0])
print(np.unique(buff_list))
如果发现有多余的分类,那么用下面的脚本把多余的txt文件删除,
import os
labels_dir = "./labels"
for label_name in os.listdir(labels_dir):
with open(os.path.join(labels_dir, label_name), 'r') as f:
for line in f.readlines():
if "face" == line.split(",")[0]:
print(label_name)
os.remove(os.path.join(labels_dir, label_name))
7.要保证txt文件和jpg图片是一一对应的,因此编写脚本把没有对应起来的txt和jpg删除掉,脚本如下
"""
由于标注时有些错误,导致图片有几张多余的或者txt文件有几张多余的,
因此要删除多余的文件,保证每一张jpg对应一个txt文件。
"""
import os
images_dir = "./JPEGImages"
labels_dir = "./labels"
#删除多余的image,
labels = []
for label in os.listdir(labels_dir):
#labels.append(label.split('.')[0])#不能用这一行,因为有些文件名字前面就有 . 这样得到的文件名字是不对的。
labels.append(os.path.splitext(label)[0])
#print(labels)
for image_name in os.listdir(images_dir):
#image_name = image_name.split('.')[0] #不能用这一行,因为有些文件名字前面就有 .
image_name = os.path.splitext(image_name)[0]
#print(image_name)
if image_name not in labels:
image_name = image_name + ".jpg"
print(image_name)
#os.remove(os.path.join(images_dir, image_name))#删除图片,最开始先把这一行注释掉,运行下看看打印,以免误删导致数据还是重新做,
#删除多余的label
images = []
for image in os.listdir(images_dir):
#images.append(image.split('.')[0])#不能用这一行,因为有些文件名字前面就有 .
images.append(os.path.splitext(image)[0] )
for label_name in os.listdir(labels_dir):
#label_name = label_name.split('.')[0]#不能用这一行,因为有些文件名字前面就有 .
label_name = os.path.splitext(label_name)[0]
if label_name not in images:
label_name = label_name + ".txt"
print(label_name)
#os.remove(os.path.join(labels_dir, label_name))#删除label,最开始先把这一行注释掉,运行下看看打印,以免误删导致数据还是重新做,
8.利用脚本获取所有的图片名字的列表,把名字写到txt文件里面,脚本如下,
import os
image_dir = "./JPEGImages"
with open("all_image_name.txt",'w') as f:
for image_name in os.listdir(image_dir):
f.write(os.path.abspath(os.path.join(image_dir,image_name))+"\n")
9.生成训练,验证,测试的txt文件,脚本如下,
import os
import random
f_train = open("./ImageSets/Main/train.txt",'w')#训练
f_val = open("./ImageSets/Main/val.txt",'w')#验证
f_trainval = open("./ImageSets/Main/trainval.txt",'w')#训练加验证
f_test =open("./ImageSets/Main/test.txt",'w')#测试,
for image_name in os.listdir("./JPEGImages"):
image_name = image_name.split(".jpg")[0]
feed = random.randint(0,10)
if feed <= 8:
f_train.write(image_name+"\n")
f_trainval.write(image_name+"\n")
if feed == 9:
f_val.write(image_name+"\n")
if feed ==10:
f_test.write(image_name+"\n")
f_train.close()
f_val.close()
f_trainval.close()
f_test.close()
10.利用脚本把txt文件转换成xml文件,脚本如下,
import time
import os
from PIL import Image
import cv2
out0 ='''<?xml version="1.0" encoding="utf-8"?>
<annotation>
<folder>None</folder>
<filename>%(name)s</filename>
<source>
<database>None</database>
<annotation>None</annotation>
<image>None</image>
<flickrid>None</flickrid>
</source>
<owner>
<flickrid>None</flickrid>
<name>None</name>
</owner>
<segmented>0</segmented>
<size>
<width>%(width)d</width>
<height>%(height)d</height>
<depth>3</depth>
</size>
'''
out1 = ''' <object>
<name>%(class)s</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>%(xmin)d</xmin>
<ymin>%(ymin)d</ymin>
<xmax>%(xmax)d</xmax>
<ymax>%(ymax)d</ymax>
</bndbox>
</object>
'''
out2 = '''</annotation>
'''
#names = ["CD"]
def translate(lists):
source = {}
label = {}
for jpg in lists:
if os.path.splitext(jpg)[1] == '.jpg':
#img=cv2.imread(jpg)
print(jpg)
#h,w,_=img.shape[:]
image= cv2.imread(jpg)
h,w,_ = image.shape
# h=320
# w=320
fxml = jpg.replace('JPEGImages','Annotations')
fxml = fxml.replace('.jpg','.xml')
fxml = open(fxml, 'w');
imgfile = jpg.split('/')[-1]
source['name'] = imgfile
source['width'] = w
source['height'] = h
fxml.write(out0 % source)
txt = jpg.replace('.jpg','.txt')
txt = txt.replace('JPEGImages','labels')
print(txt)
with open(txt,'r') as f:
lines = [i.replace('\n','') for i in f.readlines()]
for box in lines:
box = box.split(',')
label['class'] = box[0]
# _x = int(float(box[1])*w)
# _y = int(float(box[2])*h)
# _w = int(float(box[3])*w)
# _h = int(float(box[4])*h)
_x = int(float(box[1]))
_y = int(float(box[2]))
_w = int(float(box[3]))
_h = int(float(box[4]))
label['xmin'] = max(_x,0)
label['ymin'] = max(_y,0)
label['xmax'] = min(int(_x+_w),w-1)
label['ymax'] = min(int(_y+_h),h-1)
# if label['xmin']>=w or label['ymin']>=h or label['xmax']>=w or label['ymax']>=h:
# continue
# if label['xmin']<0 or label['ymin']<0 or label['xmax']<0 or label['ymax']<0:
# continue
fxml.write(out1 % label)
fxml.write(out2)
if __name__ == '__main__':
with open('all_image_name.txt','r') as f:
lines = [i.replace('\n','') for i in f.readlines()]
translate(lines)
11.生成做数据需要的trainval.txt和test.txt(注意这里生成的两个txt跟前面生成的两个txt不一样),脚本如下
import os
f = open("./trainval.txt",'w')
new_lines = ""
with open("./ImageSets/Main/trainval.txt",'r') as ff:
lines = ff.readlines()
for line in lines:
image_name = line.split("\n")[0]
new_lines += "JPEGImages/"+image_name+".jpg"+" "+"Annotations/"+image_name+".xml"+"\n"
f.write(new_lines)
f.close()
f = open("./val.txt",'w')
new_lines = ""
with open("./ImageSets/Main/val.txt",'r') as ff:
lines = ff.readlines()
for line in lines:
image_name = line.split("\n")[0]
new_lines += "JPEGImages/"+image_name+".jpg"+" "+"Annotations/"+image_name+".xml"+"\n"
f.write(new_lines)
f.close()
12.利用creat_lmdb.sh脚本生成lmdb数据,这个脚本要运行两次,一次生成训练的lmdb,一次生成test的lmdb数据,要想生成test的lmdb只需要把for subset in train这里的train改为val,然后最下面倒数第二行那里的trainval.txt改为val.txt.脚本如下,(如果要resize的话,里面的weight = 608,height = 608,这里的长宽要和后面训练时用的protext里面的输入数据的长宽对应起来,这两个设置成0表示不resize)
caffe_root=/data/chw/caffe_master
root_dir=/data/chw/refineDet_20200409/data
LINK_DIR=$root_dir/lmdb/
redo=1
db_dir="$root_dir/lmdb1/"
data_root_dir="$root_dir"
dataset_name="trian"
mapfile="/data/chw/refineDet_20200409/data/labelmap_MyDataSet.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
#width=608
#height=608
width=0
height=0
extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
then
extra_cmd="$extra_cmd --redo"
fi
for subset in train
do
python2 $caffe_root/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir /data/chw/refineDet_20200409/data/trainval.txt $db_dir/$db/$dataset_name"_"$subset"_"$db $LINK_DIR/$dataset_name
done
上面这个脚本里面的labelmap_MyDataSet.prototxt文件内容如下,这个文件的内容需要根据不同的项目进行修改,其中第一个是背景,这个不用修改,下面的item依次增加就好了,
item {
name: "none_of_the_above"
label: 0
display_name: "background"
}
item {
name: "pedestrian"
label: 1
display_name: "pedestrian"
}
item {
name: "vehicle"
label: 2
display_name: "vehicle"
}
item {
name: "biker"
label: 3
display_name: "biker"
}
create_annoset.py脚本的内容如下,其中的sys.path.insert(0,'/data/chw/caffe_master/python')要根据路径的不同进行修改
import argparse
import os
import shutil
import subprocess
import sys
sys.path.insert(0,'/data/chw/caffe_master/python')
from caffe.proto import caffe_pb2
from google.protobuf import text_format
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create AnnotatedDatum database")
parser.add_argument("root",
help="The root directory which contains the images and annotations.")
parser.add_argument("listfile",
help="The file which contains image paths and annotation info.")
parser.add_argument("outdir",
help="The output directory which stores the database file.")
parser.add_argument("exampledir",
help="The directory to store the link of the database files.")
parser.add_argument("--redo", default = False, action = "store_true",
help="Recreate the database.")
parser.add_argument("--anno-type", default = "classification",
help="The type of annotation {classification, detection}.")
parser.add_argument("--label-type", default = "xml",
help="The type of label file format for detection {xml, json, txt}.")
parser.add_argument("--backend", default = "lmdb",
help="The backend {lmdb, leveldb} for storing the result")
parser.add_argument("--check-size", default = False, action = "store_true",
help="Check that all the datum have the same size.")
parser.add_argument("--encode-type", default = "",
help="What type should we encode the image as ('png','jpg',...).")
parser.add_argument("--encoded", default = False, action = "store_true",
help="The encoded image will be save in datum.")
parser.add_argument("--gray", default = False, action = "store_true",
help="Treat images as grayscale ones.")
parser.add_argument("--label-map-file", default = "",
help="A file with LabelMap protobuf message.")
parser.add_argument("--min-dim", default = 0, type = int,
help="Minimum dimension images are resized to.")
parser.add_argument("--max-dim", default = 0, type = int,
help="Maximum dimension images are resized to.")
parser.add_argument("--resize-height", default = 0, type = int,
help="Height images are resized to.")
parser.add_argument("--resize-width", default = 0, type = int,
help="Width images are resized to.")
parser.add_argument("--shuffle", default = False, action = "store_true",
help="Randomly shuffle the order of images and their labels.")
parser.add_argument("--check-label", default = False, action = "store_true",
help="Check that there is no duplicated name/label.")
args = parser.parse_args()
root_dir = args.root
list_file = args.listfile
out_dir = args.outdir
example_dir = args.exampledir
redo = args.redo
anno_type = args.anno_type
label_type = args.label_type
backend = args.backend
check_size = args.check_size
encode_type = args.encode_type
encoded = args.encoded
gray = args.gray
label_map_file = args.label_map_file
min_dim = args.min_dim
max_dim = args.max_dim
resize_height = args.resize_height
resize_width = args.resize_width
shuffle = args.shuffle
check_label = args.check_label
# check if root directory exists
if not os.path.exists(root_dir):
print("root directory: {} does not exist".format(root_dir))
sys.exit()
# add "/" to root directory if needed
if root_dir[-1] != "/":
root_dir += "/"
# check if list file exists
if not os.path.exists(list_file):
print("list file: {} does not exist".format(list_file))
sys.exit()
# check list file format is correct
with open(list_file, "r") as lf:
for line in lf.readlines():
img_file, anno = line.strip("\n").split(" ")
if not os.path.exists(root_dir + img_file):
print("image file: {} does not exist".format(root_dir + img_file))
if anno_type == "classification":
if not anno.isdigit():
print("annotation: {} is not an integer".format(anno))
elif anno_type == "detection":
if not os.path.exists(root_dir + anno):
print("annofation file: {} does not exist".format(root_dir + anno))
sys.exit()
break
# check if label map file exist
if anno_type == "detection":
if not os.path.exists(label_map_file):
print("label map file: {} does not exist".format(label_map_file))
sys.exit()
label_map = caffe_pb2.LabelMap()
lmf = open(label_map_file, "r")
try:
text_format.Merge(str(lmf.read()), label_map)
except:
print("Cannot parse label map file: {}".format(label_map_file))
sys.exit()
out_parent_dir = os.path.dirname(out_dir)
if not os.path.exists(out_parent_dir):
os.makedirs(out_parent_dir)
if os.path.exists(out_dir) and not redo:
print("{} already exists and I do not hear redo".format(out_dir))
sys.exit()
if os.path.exists(out_dir):
shutil.rmtree(out_dir)
# get caffe root directory
caffe_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
if anno_type == "detection":
cmd = "{}/build/tools/convert_annoset" \
" --anno_type={}" \
" --label_type={}" \
" --label_map_file={}" \
" --check_label={}" \
" --min_dim={}" \
" --max_dim={}" \
" --resize_height={}" \
" --resize_width={}" \
" --backend={}" \
" --shuffle={}" \
" --check_size={}" \
" --encode_type={}" \
" --encoded={}" \
" --gray={}" \
" {} {} {}" \
.format(caffe_root, anno_type, label_type, label_map_file, check_label,
min_dim, max_dim, resize_height, resize_width, backend, shuffle,
check_size, encode_type, encoded, gray, root_dir, list_file, out_dir)
elif anno_type == "classification":
cmd = "{}/build/tools/convert_annoset" \
" --anno_type={}" \
" --min_dim={}" \
" --max_dim={}" \
" --resize_height={}" \
" --resize_width={}" \
" --backend={}" \
" --shuffle={}" \
" --check_size={}" \
" --encode_type={}" \
" --encoded={}" \
" --gray={}" \
" {} {} {}" \
.format(caffe_root, anno_type, min_dim, max_dim, resize_height,
resize_width, backend, shuffle, check_size, encode_type, encoded,
gray, root_dir, list_file, out_dir)
print(cmd)
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output = process.communicate()[0]
if not os.path.exists(example_dir):
os.makedirs(example_dir)
link_dir = os.path.join(example_dir, os.path.basename(out_dir))
if os.path.exists(link_dir):
os.unlink(link_dir)
os.symlink(out_dir, link_dir)
二 训练
做完lmdb之后,下一步需要进行训练,训练需要train.prototxt和solver.prototxt,也可以增加预训练模型,train.prototxt和solver.prototxt属于公司代码,不贴在博客上了。
然后就用命令行进行训练: caffe/build/tools/caffe train --solver solver.prototxt --weights xxx.caffemodel &> log.txt进行训练。
例如:
/data/caffe_s3fd-ssd/build/tools/caffe train --solver="./solver.prototxt" --weights="./_iter_300000.caffemodel" --gpu 1,2,3 >&log.txt
如果想后台运行,那么可以用nohup命令, 例如,
nohup /data/caffe_s3fd-ssd/build/tools/caffe train --solver="./solver.prototxt" --weights="./_iter_300000.caffemodel" --gpu 1,2,3 >&log.txt &
另外,也可以从快照中恢复训练,例如
# ./build/tools/caffe train -solver examples/mnist/lenet_solver.prototxt -snapshot examples/mnist/lenet_iter_5000.solverstate
-snapshot和-weight不能同时使用。
三 测试
测试用的是C++代码,属于公司代码,不贴在博客上了。