https://github.com/YunYang1994/tensorflow-yolov3
这个是大佬的源代码,拿来直接用就可以,首先需要两个数据集,一个是测试用的,一个是训练用的。
然后我们对这两个数据集进行一些文档生成,主要是core/config文件里面所需要的一些文档
首先是voc_train.txt,其格式为 文件路径 左上角坐标 右下角坐标 类名
那么首先需要生成类名文件
需要一个脚本去读取annotations文件夹,生成类名文件保存在data/classes目录下
import os
import re
import xml.etree.cElementTree as et
annotations_path = "D:/Tensorflow/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations/"
save_all_class_name = []
if __name__ == "__main__":
for filanama in os.listdir(annotations_path):
filename = annotations_path + filanama
e = et.parse(filename).getroot()
str_one_line = "D:/Tensorflow/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/"
for atype_2 in e.findall('object'):
for atype_3 in atype_2.findall("name"):
if atype_3.text not in save_all_class_name:
save_all_class_name.append(atype_3.text)
with open('D:/Tensorflow/tensorflow-yolov3-master/data/classes/my_voc.names', 'a+') as f_names:
for i in save_all_class_name:
f_names.writelines(i+"\n")
f_names.close()
生成一个my_voc_train的文件,用一个脚本读取voc2007数据集里面的annotations文件夹,然后训练标注文件
annotations_path = "D:/Tensorflow/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations/"
save_all_class_name = []
all_class_name = []
dict_all_class_name = {}
str_all_line = []
with open('D:/Tensorflow/tensorflow-yolov3-master/data/classes/voc_2007_nov/my_voc.names', 'r') as f:
while True:
buffer_text = f.readline()
buffer_text = buffer_text.rstrip("\n")
if buffer_text != '':
all_class_name.append(buffer_text)
if not buffer_text:
break
count = 0
for i in all_class_name:
dict_all_class_name[i] = count
count += 1
print(dict_all_class_name)
if __name__ == "__main__":
for filanama in os.listdir(annotations_path):
filename = annotations_path + filanama
e = et.parse(filename).getroot()
str_one_line = "D:/Tensorflow/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/"
for atype in e.findall('filename'):
str_one_line += atype.text
print(atype.text)
str_one_line += " "
for atype_2 in e.findall('object'):
for atype_4 in atype_2.findall("bndbox"):
atype_xmin = atype_4.findall("xmin")[0].text
atype_ymin = atype_4.findall("ymin")[0].text
atype_xmax = atype_4.findall("xmax")[0].text
atype_ymax = atype_4.findall("ymax")[0].text
str_one_line += atype_xmin
str_one_line += ","
str_one_line += atype_ymin
str_one_line += ","
str_one_line += atype_xmax
str_one_line += ","
str_one_line += atype_ymax
str_one_line += ","
for atype_3 in atype_2.findall("name"):
str_one_line += str(dict_all_class_name[atype_3.text])
str_one_line += " "
if atype_3.text not in save_all_class_name:
save_all_class_name.append(atype_3.text)
str_all_line.append(str_one_line)
with open("D:/Tensorflow/tensorflow-yolov3-master/data/dataset/my_voc_test.txt", 'a+') as ff:
for uu in str_all_line:
ff.writelines(uu)
ff.writelines("\n")
ff.close()
同样用来生成测试集的标注文件,名字自命名,生成如下内容
如果想检测一下生成的标注文件是否正确,可以用脚本测试一下
import cv2
import os
import re
all_class_name = []
dict_all_class_name = {}
with open('D:/Tensorflow/tensorflow-yolov3-master/data/classes/voc_2007_nov/my_voc.names', 'r') as f:
while True:
buffer_text = f.readline()
buffer_text = buffer_text.rstrip("\n")
if buffer_text != '':
all_class_name.append(buffer_text)
if not buffer_text:
break
count = 0
for i in all_class_name:
dict_all_class_name[count] = i
count += 1
print(dict_all_class_name)
with open('D:/Tensorflow/tensorflow-yolov3-master/data/dataset/my_voc_test.txt', 'r') as f:
while True:
a = f.readline()
pattern = re.compile('(\d+.jpg).+', re.S)
result = re.findall(pattern, a)
#每张图片
if len(result) != 0:
image = cv2.imread("D:/Tensorflow/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/"
+ result[0])
pattert_2 = re.compile('\s?(\d+,\d+,\d+,\d+),\d+', re.S)
pattert_2_2 = re.compile('\s?\d+,\d+,\d+,\d+,(\d+)', re.S)
result_2 = re.findall(pattert_2, a)
result_2_2 = re.findall(pattert_2_2, a)
pic_count = 0
for pt_data in result_2:
pattert_3 = re.compile('(\d+,\d+)', re.S)
oop = re.findall(pattert_3, pt_data)
#每两个点
pt1_x = int(oop[0].split(',')[0])
pt1_y = int(oop[0].split(',')[1])
pt2_x = int(oop[1].split(',')[0])
pt2_y = int(oop[1].split(',')[1])
# pt2 = oop.split(',')[1]
in_class = int(result_2_2[pic_count])
buffer_class = dict_all_class_name[in_class]
pic_count += 1
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(image, buffer_class, (50, 300), font, 0.8, (0, 0, 255), 2)
cv2.rectangle(image, (pt1_x, pt1_y), (pt2_x, pt2_y), (0, 255, 0), 2)
cv2.imshow(str(result[0]), image)
cv2.waitKey(2000)
cv2.destroyAllWindows()
else:
pass
if not a:
break
效果如下
,确认没有问题,这样就可以对数据集进行训练了,不想改网络模型的话直接打开train.py开始训练,训练之前需要改一下config文件
这些是之前生成的,另外需要根据自己的显卡改一下batchsize,对于voc数据集和darknet,我的2080batchsize是6
然后就可以train了,时间大约半天多,看自己的显卡和数据集。
train完后会生成一些ckpt文件
需要把ckpt文件转为pb文件,调用
生成
使用的时候就可以直接使用pb文件进行识别了,使用的时候把num_class改成自己数据集的num_class
感觉速度不是很快,而且对于工业界的大橡素目标检测并不好,正在测试shufflenet2取代darknet53,针对5000*5000的像素,奈何没有标注人员,只能改日再写了