1.下载darknet源码
git clone https://github.com/pjreddie/darknet
cd darknet
2.修改配置文件Makefile
GPU=1 #如果使用GPU设置为1,CPU设置为0
CUDNN=1 #如果使用CUDNN设置为1,否则为0
OPENCV=0 #如果调用摄像头,还需要设置OPENCV为1,否则为0
OPENMP=0 #如果使用OPENMP设置为1,否则为0
DEBUG=0 #如果使用DEBUG设置为1,否则为0
4.下载yolov3预训练模型
wget https://pjreddie.com/media/files/yolov3.weights
5.测试
./darknet detector test cfg/coco.data cfg/yolov3.cfg yolov3.weights data/dog.jpg
在darknet目录下创建**mask**文件夹,目录结构如下,将之前标注好的图片和xml文件放到对应目录下
会在./mask目录下生成一个labels文件夹一个txt文件(mask_train.txt)(内容是: 类别的编码和目标的相对位置)。
..JPEGImages#存放图像
...Annotations#存放图像对应的xml文件
...ImageSets/Main
在darknet目录下创建my_label.py 用来将voc2007格式数据转换成yolo训练的格式
# coding=UTF-8
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
# 源代码sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
sets = [('mask', 'train'),('mask', 'test')] # 改成自己建立的myData
classes = ["have_mask", "no_mask"] # 改成自己的类别
wd ='/home/wei/darknet'
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(year, image_id):
in_file = open('/home/wei/darknet/mask/Annotations/%s.xml' % (image_id)) # 源代码VOCdevkit/VOC%s/Annotations/%s.xml
out_file = open('/home/wei/darknet/mask/labels/%s.txt' % (image_id), 'w') # 源代码VOCdevkit/VOC%s/labels/%s.txt
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
for year, image_set in sets:
if not os.path.exists('/home/wei/darknet/mask/labels/'): # 改成自己建立的myData
os.makedirs('/home/wei/darknet/mask/labels/')
image_ids = open('/home/wei/darknet/mask/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open('/home/wei/darknet/mask/%s_%s.txt' % (year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/mask/JPEGImages/%s.jpg\n' % (wd, image_id))
convert_annotation(year, image_id)
list_file.close()
4.修改darknet/cfg下的voc.data和yolov3-voc.cfg文件
为了保险起见,复制这两个文件,并分别重命名为mask.data和mask_yolov3.cfg
maks.data内容:
classes= 2
train = /home/wei/darknet/mask/mask_train.txt
valid = /home/wei/darknet/mask/mask_test.txt
names = /home/wei/darknet/mask/mask.names
backup = backup
mask_yolov3.cfg的内容:
/yolo, 总共会搜出3个含有yolo的地方。
每个地方都必须要改2处, filters:3*(5+len(classes));
其中:classes: len(classes) = 3,这里以我的工程为例
filters = 24
classes = 3
可修改:random = 1:原来是1,显存小改为0。(是否要多尺度输出。)
6.在mask文件夹下新建mask.names文件
have_mask
no_mask
7.下载预训练权重
wget https://pjreddie.com/media/files/darknet53.conv.74
8.开始训练
./darknet detector train cfg/my_data.data cfg/my_yolov3.cfg darknet53.conv.74
或者指定gpu训练,默认使用gpu0
计算测试后的mAP
在darknet在创建voc_eval.py
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Bharath Hariharan
# --------------------------------------------------------
import xml.etree.ElementTree as ET
import os
import cPickle
import numpy as np
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_eval(detpath,
annopath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5,
use_07_metric=False):
"""rec, prec, ap = voc_eval(detpath,
annopath,
imagesetfile,
classname,
[ovthresh],
[use_07_metric])
Top level function that does the PASCAL VOC evaluation.
detpath: Path to detections
detpath.format(classname) should produce the detection results file.
annopath: Path to annotations
annopath.format(imagename) should be the xml annotations file.
imagesetfile: Text file containing the list of images, one image per line.
classname: Category name (duh)
cachedir: Directory for caching the annotations
[ovthresh]: Overlap threshold (default = 0.5)
[use_07_metric]: Whether to use VOC07's 11 point AP computation
(default False)
"""
# assumes detections are in detpath.format(classname)
# assumes annotations are in annopath.format(imagename)
# assumes imagesetfile is a text file with each line an image name
# cachedir caches the annotations in a pickle file
# first load gt
if not os.path.isdir(cachedir):
os.mkdir(cachedir)
cachefile = os.path.join(cachedir, 'annots.pkl')
# read list of images
with open(imagesetfile, 'r') as f:
lines = f.readlines()
imagenames = [x.strip() for x in lines]
if not os.path.isfile(cachefile):
# load annots
recs = {}
for i, imagename in enumerate(imagenames):
recs[imagename] = parse_rec(annopath.format(imagename))
if i % 100 == 0:
print 'Reading annotation for {:d}/{:d}'.format(
i + 1, len(imagenames))
# save
print 'Saving cached annotations to {:s}'.format(cachefile)
with open(cachefile, 'w') as f:
cPickle.dump(recs, f)
else:
# load
with open(cachefile, 'r') as f:
recs = cPickle.load(f)
# extract gt objects for this class
class_recs = {}
npos = 0
for imagename in imagenames:
R = [obj for obj in recs[imagename] if obj['name'] == classname]
bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
npos = npos + sum(~difficult)
class_recs[imagename] = {'bbox': bbox,
'difficult': difficult,
'det': det}
# read dets
detfile = detpath.format(classname)
with open(detfile, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# sort by confidence
sorted_ind = np.argsort(-confidence)
sorted_scores = np.sort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs
nd = len(image_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
else:
fp[d] = 1.
else:
fp[d] = 1.
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap
创建computer_Single_ALL_mAP.py
from voc_eval import voc_eval
import os
current_path = os.getcwd()
results_path = current_path+"/results"
sub_files = os.listdir(results_path)
mAP = []
for i in range(len(sub_files)):
class_name = sub_files[i].split(".txt")[0]
rec, prec, ap = voc_eval('/home/wei/darknet/results/{}.txt',
'/home/wei/darknet/mask/Annotations/{}.xml',
'/home/wei/darknet/mask/ImageSets/Main/test.txt',
class_name, '.')
print("mAP for {} :\t {} ".format(class_name, ap))
mAP.append(ap)
mAP = tuple(mAP)
print("--------------")
print("mAP for all:\t {}".format( float( sum(mAP)/len(mAP)) ))
print("--------------")
绘制loss曲线
./darknet detector train mask/mask.data mask/mask.cfg darknet53.conv.74 2>1 | tee visualization/train_yolov3.log
在使用脚本绘制变化曲线之前,需要先使用extract_log.py脚本,格式化log,用生成的新的log文件供可视化工具绘图,格式化log的extract_log.py脚本如下(和生成的log文件同一目录):
# coding=utf-8
# 该文件用来提取训练log,去除不可解析的log后使log文件格式化,生成新的log文件供可视化工具绘图
import inspect
import os
import random
import sys
def extract_log(log_file,new_log_file,key_word):
with open(log_file, 'r') as f:
with open(new_log_file, 'w') as train_log:
#f = open(log_file)
#train_log = open(new_log_file, 'w')
for line in f:
# 去除多gpu的同步log
if 'Syncing' in line:
continue
# 去除除零错误的log
if 'nan' in line:
continue
if key_word in line:
train_log.write(line)
f.close()
train_log.close()
extract_log('train_yolov3.log','train_log_loss.txt','images')
extract_log('train_yolov3.log','train_log_iou.txt','IOU')
运行之后,会解析log文件的loss行和iou行得到两个txt文件
使用train_loss_visualization.py脚本可以绘制loss变化曲线
train_loss_visualization.py脚本如下(也是同一目录新建py文件):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
lines =5124 #改为自己生成的train_log_loss.txt中的行数
result = pd.read_csv('train_log_loss.txt', skiprows=[x for x in range(lines) if ((x%10!=9) |(x<1000))] ,error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
result.head()
result['loss']=result['loss'].str.split(' ').str.get(1)
result['avg']=result['avg'].str.split(' ').str.get(1)
result['rate']=result['rate'].str.split(' ').str.get(1)
result['seconds']=result['seconds'].str.split(' ').str.get(1)
result['images']=result['images'].str.split(' ').str.get(1)
result.head()
result.tail()
# print(result.head())
# print(result.tail())
# print(result.dtypes)
print(result['loss'])
print(result['avg'])
print(result['rate'])
print(result['seconds'])
print(result['images'])
result['loss']=pd.to_numeric(result['loss'])
result['avg']=pd.to_numeric(result['avg'])
result['rate']=pd.to_numeric(result['rate'])
result['seconds']=pd.to_numeric(result['seconds'])
result['images']=pd.to_numeric(result['images'])
result.dtypes
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['avg'].values,label='avg_loss')
# ax.plot(result['loss'].values,label='loss')
ax.legend(loc='best') #图列自适应位置
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig('avg_loss')
# fig.savefig('loss')