YOLOv4的训练过程基本和YOLOv3相似,要完成目标检测,大部分时间会浪费在数据集的准备上;因此这里主要讲一下数据集的准备和制作,其次说一下训练过程。
一、制作VOC数据集
1.图片数据准备
图片数据的获取可以自己用高清摄像机采集视频,然后对其进行抽帧,获得图片数据集视频>>图片;因为xml文件序号要与图片编号一一对应,这里文件重命名可以用代码完成序号重命名(00001,00002等)
2.标注数据
这里用的是比较常用的labelImg标注工具,每标注一张图片都会生成一个xml文件。labelImg可以去浏览器下载,这里给个我当时下载的链接labelImg下载
labelImg使用方法大家直接搜一下就可以了,很简单也很好用的工具,页面大概是这样
3.创建VOC文件夹
新建文件夹命名VOC2007,并且在VOC2007下新建Annotations,ImageSets和JPEGImages三个文件夹,在ImageSets文件夹下再建一个Main文件夹。Annotations里面放已经标注好的xml文件,JPEGImages里面放自己的图片数据(序号00001,00002,00003…)
二、训练准备
1.生成txt文件
在VOC2007文件夹里新建test.py,运行代码将会在Main文件夹里生成train.txt,val.txt,test.txt和trainval.txt四个文件。代码如下:
import os
import random
trainval_percent = 0.1
train_percent = 0.9
xmlfilepath = 'Annotations'
txtsavepath = 'ImageSets\Main'
total_xml = os.listdir(xmlfilepath)
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
ftrainval = open('ImageSets/Main/trainval.txt', 'w')
ftest = open('ImageSets/Main/test.txt', 'w')
ftrain = open('ImageSets/Main/train.txt', 'w')
fval = open('ImageSets/Main/val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftest.write(name)
else:
fval.write(name)
else:
ftrain.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
做完这一步,VOC2007目录如下
2.convert.py
运行convert.py代码将预训练权重转化为keras用的h5文件
import os
import colorsys
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from yolo4.model import yolo_eval, yolo4_body
from yolo4.utils import letterbox_image
from PIL import Image, ImageFont, ImageDraw
from timeit import default_timer as timer
import matplotlib.pyplot as plt
from operator import itemgetter
class Yolo4(object):
def get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def load_yolo(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
self.class_names = self.get_class()
self.anchors = self.get_anchors()
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
self.sess = K.get_session()
# Load model, or construct model and load weights.
self.yolo4_model = yolo4_body(Input(shape=(608, 608, 3)), num_anchors//3, num_classes)
# Read and convert darknet weight
print('Loading weights.')
weights_file = open(self.weights_path, 'rb')
major, minor, revision = np.ndarray(
shape=(3, ), dtype='int32', buffer=weights_file.read(12))
if (major*10+minor)>=2 and major<1000 and minor<1000:
seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
else:
seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
print('Weights Header: ', major, minor, revision, seen)
convs_to_load = []
bns_to_load = []
for i in range(len(self.yolo4_model.layers)):
layer_name = self.yolo4_model.layers[i].name
if layer_name.startswith('conv2d_'):
convs_to_load.append((int(layer_name[7:]), i))
if layer_name.startswith('batch_normalization_'):
bns_to_load.append((int(layer_name[20:]), i))
convs_sorted = sorted(convs_to_load, key=itemgetter(0))
bns_sorted = sorted(bns_to_load, key=itemgetter(0))
bn_index = 0
for i in range(len(convs_sorted)):
print('Converting ', i)
if i == 93 or i == 101 or i == 109:
#no bn, with bias
weights_shape = self.yolo4_model.layers[convs_sorted[i][1]].get_weights()[0].shape
bias_shape = self.yolo4_model.layers[convs_sorted[i][1]].get_weights()[0].shape[3]
filters = bias_shape
size = weights_shape[0]
darknet_w_shape = (filters, weights_shape[2], size, size)
weights_size = np.product(weights_shape)
conv_bias = np.ndarray(
shape=(filters, ),
dtype='float32',
buffer=weights_file.read(filters * 4))
conv_weights = np.ndarray(
shape=darknet_w_shape,
dtype='float32',
buffer=weights_file.read(weights_size * 4))
conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
self.yolo4_model.layers[convs_sorted[i][1]].set_weights([conv_weights, conv_bias])
else:
#with bn, no bias
weights_shape = self.yolo4_model.layers[convs_sorted[i][1]].get_weights()[0].shape
size = weights_shape[0]
bn_shape = self.yolo4_model.layers[bns_sorted[bn_index][1]].get_weights()[0].shape
filters = bn_shape[0]
darknet_w_shape = (filters, weights_shape[2], size, size)
weights_size = np.product(weights_shape)
conv_bias = np.ndarray(
shape=(filters, ),
dtype='float32',
buffer=weights_file.read(filters * 4))
bn_weights = np.ndarray(
shape=(3, filters),
dtype='float32',
buffer=weights_file.read(filters * 12))
bn_weight_list = [
bn_weights[0], # scale gamma
conv_bias, # shift beta
bn_weights[1], # running mean
bn_weights[2] # running var
]
self.yolo4_model.layers[bns_sorted[bn_index][1]].set_weights(bn_weight_list)
conv_weights = np.ndarray(
shape=darknet_w_shape,
dtype='float32',
buffer=weights_file.read(weights_size * 4))
conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
self.yolo4_model.layers[convs_sorted[i][1]].set_weights([conv_weights])
bn_index += 1
weights_file.close()
self.yolo4_model.save(self.model_path)
if self.gpu_num>=2:
self.yolo4_model = multi_gpu_model(self.yolo4_model, gpus=self.gpu_num)
self.input_image_shape = K.placeholder(shape=(2, ))
self.boxes, self.scores, self.classes = yolo_eval(self.yolo4_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score)
def __init__(self, score, iou, anchors_path, classes_path, model_path, weights_path, gpu_num=1):
self.score = score
self.iou = iou
self.anchors_path = anchors_path
self.classes_path = classes_path
self.weights_path = weights_path
self.model_path = model_path
self.gpu_num = gpu_num
self.load_yolo()
def close_session(self):
self.sess.close()
if __name__ == '__main__':
model_path = 'yolo4_weight.h5'
anchors_path = 'model_data/yolo4_anchors.txt'
classes_path = 'model_data/coco_classes.txt'
weights_path = 'yolov4.weights'
score = 0.5
iou = 0.5
model_image_size = (608, 608)
yolo4_model = Yolo4(score, iou, anchors_path, classes_path, model_path, weights_path)
yolo4_model.close_session()
3.修改运行voc_annotation代码
改一下第六行,然后运行运行voc_annotation代码,生成2007_train.txt,2007_test.txt,2007_val.txt
4.运行k-means
生成anchors大小,然后用这个数据替换yolo4_anchor.txt的数据。
5.对train.py的参数进行修改
因为可以修改的东西比较多,这里就不详细说明了,先把路径,文件输入正确,其他问题不大。
做到这一步就基本可以训练啦。