实习期间做了一个用YOLOv3检测铁柱缺损的工程,缺损类别只有四类,训练数据不多,用到了数据增强以及数据格式转化的代码。
首先用imglabel生成数据的xml文件以及对应图片的jpg或者png格式(是别的同事帮忙标的,这个我就没做过了)
因为检测的是比较贵重的首饰,所以用于训练的样本比较少,用了两个python数据拓展的代码:
1、图片旋转`
import cv2
import math
import numpy as np
import os
import pdb
import xml.etree.ElementTree as ET
class ImgAugemention():
def __init__(self):
self.angle = 90
# rotate_img
def rotate_image(self, src, angle, scale=1.):
w = src.shape[1]
h = src.shape[0]
# convet angle into rad
rangle = np.deg2rad(angle) # angle in radians
# calculate new image width and height
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# map
return cv2.warpAffine(
src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4)
def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
w = src.shape[1]
h = src.shape[0]
rangle = np.deg2rad(angle) # angle in radians
# now calculate new image width and height
# get width and heigh of changed image
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# rot_mat: the final rot matrix
# get the four center of edges in the initial martix,and convert the coord
point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
# concat np.array
concat = np.vstack((point1, point2, point3, point4))
# change type
concat = concat.astype(np.int32)
print(concat)
rx, ry, rw, rh = cv2.boundingRect(concat)
return rx, ry, rw, rh
def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list):
# assign the rot angles
if not os.path.exists(img_save_path):
os.mkdir(img_save_path)
if not os.path.exists(xml_save_path):
os.mkdir(xml_save_path)
for angle in angle_list:
for img_name in os.listdir(imgs_path):
# split filename and suffix
n, s = os.path.splitext(img_name)
# for the sake of use yol model, only process '.jpg'
if s == ".jpg":
img_path = os.path.join(imgs_path, img_name)
img = cv2.imread(img_path)
rotated_img = self.rotate_image(img, angle)
# 写入图像
cv2.imwrite(img_save_path + n + "_" + str(angle) + ".jpg", rotated_img)
print("log: [%sd] %s is processed." % (angle, img))
xml_url = img_name.split('.')[0] + '.xml'
xml_path = os.path.join(xmls_path, xml_url)
tree = ET.parse(xml_path)
root = tree.getroot()
for box in root.iter('bndbox'):
xmin = float(box.find('xmin').text)
ymin = float(box.find('ymin').text)
xmax = float(box.find('xmax').text)
ymax = float(box.find('ymax').text)
x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
# change the coord
box.find('xmin').text = str(x)
box.find('ymin').text = str(y)
box.find('xmax').text = str(x+w)
box.find('ymax').text = str(y+h)
box.set('updated', 'yes')
# write into new xml
tree.write(xml_save_path + n + "_" + str(angle) + ".xml")
print("[%s] %s is processed." % (angle, img_name))
if __name__ == '__main__':
img_aug = ImgAugemention()
imgs_path = 'data/images/' # 图片存放路径
xmls_path = 'data/annotations' # 标注文件存放路径
img_save_path = 'data/images_enhance/' # 扩增图片保存路径
xml_save_path = 'data/ann_enhance/' # 对应标注文件保存路径
angle_list = [90, 180, 270]
img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list)
2、图片亮暗度调节以及高斯加噪操作:
import cv2
import numpy as np
import os
import xml.etree.ElementTree as ET
import copy
# 椒盐噪声
def SaltAndPepper(src, percetage=0.9):
SP_NoiseImg = src.copy()
SP_NoiseNum = int(percetage * src.shape[0] * src.shape[1])
for i in range(SP_NoiseNum):
randR = np.random.randint(0, src.shape[0] - 1)
randG = np.random.randint(0, src.shape[1] - 1)
randB = np.random.randint(0, 3)
if np.random.randint(0, 1) == 0:
SP_NoiseImg[randR, randG, randB] = 0
else:
SP_NoiseImg[randR, randG, randB] = 255
return SP_NoiseImg
# 高斯噪声
# def addGaussianNoise(image, percetage):
# G_Noiseimg = image.copy()
# w = image.shape[1]
# h = image.shape[0]
# G_NoiseNum = int(percetage * image.shape[0] * image.shape[1])
# for i in range(G_NoiseNum):
# temp_x = np.random.randint(0, h)
# temp_y = np.random.randint(0, w)
# G_Noiseimg[temp_x][temp_y][np.random.randint(3)] = np.random.randn(1)[0]
# return G_Noiseimg
def gasuss_noise(image, mean=0, var=0.01):
'''
添加高斯噪声
mean : 均值
var : 方差
'''
image = np.array(image/255, dtype=float)
noise = np.random.normal(mean, var ** 0.5, image.shape)
out = image + noise
# if out.min() < 0:
# low_clip = -1.
# else:
# low_clip = 0.
out = np.clip(out, 0, 1)
out = np.uint8(out*255)
#cv.imshow("gasuss", out)
return out
# 昏暗
def darker(image, percetage=0.8):
image_copy = image.copy()
w = image.shape[1]
h = image.shape[0]
# get darker
for xi in range(0, w):
for xj in range(0, h):
image_copy[xj, xi, 0] = int(image[xj, xi, 0] * percetage)
image_copy[xj, xi, 1] = int(image[xj, xi, 1] * percetage)
image_copy[xj, xi, 2] = int(image[xj, xi, 2] * percetage)
return image_copy
# 亮度增加
def brighter(image, percetage=1.5):
image_copy = image.copy()
w = image.shape[1]
h = image.shape[0]
# get brighter
for xi in range(0, w):
for xj in range(0, h):
image_copy[xj, xi, 0] = np.clip(int(image[xj, xi, 0] * percetage), a_max=255, a_min=0)
image_copy[xj, xi, 1] = np.clip(int(image[xj, xi, 1] * percetage), a_max=255, a_min=0)
image_copy[xj, xi, 2] = np.clip(int(image[xj, xi, 2] * percetage), a_max=255, a_min=0)
return image_copy
# 旋转
def rotate(image, angle, center=None, scale=1.0):
(h, w) = image.shape[:2]
# If no rotation center is specified, the center of the image is set as the rotation center
if center is None:
center = (w / 2, h / 2)
m = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, m, (w, h))
return rotated
# 翻转
def flip(image):
flipped_image = np.fliplr(image)
return flipped_image
# 图片文件夹路径
file_dir = 'data/images/'
xmls_path = 'data/annotations/'
# 保存路径
save_dir = 'data/new_images_enhance/'
xml_save_path = 'data/new_ann_enhance/'
if not os.path.exists(save_dir):
os.mkdir(save_dir)
if not os.path.exists(xml_save_path):
os.mkdir(xml_save_path)
# with open(txt_dir, 'r') as f:
for file in os.listdir(file_dir):
line = file.split('.')[0]
img_path = file_dir + file
img = cv2.imread(img_path)
xml_path = xmls_path + file.split('.')[0] + '.xml'
tree = ET.parse(xml_path)
# 变亮、变暗
img_darker = darker(img)
cv2.imwrite(save_dir + line + '_darker.jpg', img_darker)
tree.write(xml_save_path + line + "_darker.xml")
img_brighter = brighter(img)
cv2.imwrite(save_dir + line + '_brighter.jpg', img_brighter)
tree.write(xml_save_path + line + "_brighter.xml")
img_gasuss_noise = gasuss_noise(img)
cv2.imwrite(save_dir + line + '_gasuss_noise.jpg', img_darker)
tree.write(xml_save_path + line + "_gasuss_noise.xml")
img_SaltAndPepper = SaltAndPepper(img)
cv2.imwrite(save_dir + line + '_SaltAndPepper.jpg', img_brighter)
tree.write(xml_save_path + line + "_SaltAndPepper.xml")
这其中有部分地方需要用的jpg图片格式,然后自己还找了一个png转换为jpg的代码:
import os
from PIL import Image
dirname_read="png格式文件夹路径/" # 注意后面的斜杠
dirname_write="jpg格式输出路径/"
names=os.listdir(dirname_read)
count=0
for name in names:
img=Image.open(dirname_read+name)
name=name.split(".")
if name[-1] == "png":
name[-1] = "jpg"
name = str.join(".", name)
r,g,b,a=img.split()
img=Image.merge("RGB",(r,g,b))
to_save_path = dirname_write + name
img.save(to_save_path)
count+=1
print(to_save_path, "------conut:",count)
else:
continue
参考的是这位大神的帖子:
https://blog.csdn.net/weixin_44500897/article/details/88568279
数据做好以后就是生成训练集和测试集以及格式的转换了
# coding:utf-8
import os
import random
import argparse
from os import getcwd
parser = argparse.ArgumentParser()
#xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
# parser.add_argument('--xml_path', default='Annotations/4125', type=str, help='input xml label path')
# #数据集的划分,地址选择自己数据下的ImageSets/Main
# parser.add_argument('--txt_path', default='img_lab/ImageSets/Main-4125', type=str, help='output txt label path')
parser.add_argument('--labels_path', default='./labels/', type=str, help='input xml label path')
#数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='./Main/', type=str, help='output txt label path')
opt = parser.parse_args()
trainval_percent = 1.0
train_percent = 0.8
xmlfilepath = opt.labels_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
os.makedirs(txtsavepath)
num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)
# wd = getcwd()
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/valid.txt', 'w')
for i in list_index:
name = 'data/custom/images/' + total_xml[i][:-4] + '.jpg' + '\n' # 可以按照自己的需要来调整比例
# name = total_xml[i][:-4] + '\n'
if i in trainval:
if i in train:
file_train.write(name)
else:
file_val.write(name)
# file_trainval.close()
file_train.close()
file_val.close()
这里会生成一个train.txt文件以及.valid.txt文件,可以把train.txt理解为训练目录,在训练时先看这个目录,然后去找对应的图片和txt文件。(检查最后一行是否有额外空白行!!只多留一行空白行,目录留两行空白行会报错)
然后是将xml转换为txt格式:
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets = ['train', 'valid']
classes = ['hollow', 'gap', 'nosmall', 'nosphere'] ##修改为自己的类别
# 原样保留。size为图片大小
# 将ROI的坐标转换为yolo需要的坐标
# size是图片的w和h
# box里保存的是ROI的坐标(x,y的最大值和最小值)
# 返回值为ROI中心点相对于图片大小的比例坐标,和ROI的w、h相对于图片大小的比例
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_add):
# image_add进来的是带地址的.jpg
image_add = os.path.split(image_add)[1] # 截取文件名带后缀
image_add = image_add[0:image_add.find('.', 1)] # 删除后缀,现在只有文件名没有后缀
# 现在传进来的只有图片名没有后缀
in_file = open('/home/ubuntu/mydata/生成labels/annotations/' + image_add + '.xml') # 修改为你自己的输入目录
out_file = open('/home/ubuntu/mydata/生成labels/labels/%s.txt' % (image_add), 'w') # 修改为你自己的输出目录
tree = ET.parse(in_file)
root = tree.getroot()
if root.find('size'):
size = root.find('size')
w = int(size.find('width').text) # 偶尔xml标记出错,width或height设置为0了
h = int(size.find('height').text) # 需要标记出来,便于单独处理
# if w == 0:
# print("出错! width或height为0: " + image_add)
# os.remove("G:/set/" + image_add + ".jpg")
# os.remove("G:/set/" + image_add + ".xml")
# return
# # 在一个XML中每个Object的迭代
for obj in root.iter('object'):
# iter()方法可以递归遍历元素/树的所有子元素
difficult = obj.find('difficult').text
cls = obj.find('name').text
# 如果训练标签中的品种不在程序预定品种,或者difficult = 1,跳过此object
if cls not in classes or int(difficult) == 1:
continue
# cls_id 只等于1
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
# b是每个Object中,一个bndbox上下左右像素的元组
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# else:
# print("出错!xml缺少size: " + image_add) # 偶尔xml缺少size,需要标记出来,便于单独处理
# os.remove("G:/set/" + image_add + ".jpg")
# os.remove("G:/set/" + image_add + ".xml")
image_adds = open('/home/ubuntu/mydata/生成labels/Main/train.txt') # 修改为你自己的训练数据集目录
for image_add in image_adds:
# print(image_add)
# image_add = image_add.strip()
# print (image_add)
convert_annotation(image_add)
最后数据处理完就是修改网络里面一些配置文件以及路径,因为不同版本文件不太一样,总的来说修改以下几点:
1、类别数,train.txt和vaild.txt以及对应的路径信息
2、在classes.names里面修改类别名字(也是只留一个空白行,不留空白行最后一个类别无法读入!)
3、修改网络参数里面fliters的值,一共有三个,可以ctrl+F查找YOLO关键词,在每个YOLO的上一个卷积层的最后一个fliters改为3*(5+classes的数量)
4、无论是train.py test.py以及detect.py都需要修改def main开始前面那些读取配置文件的路径,同时看好train.py里面训练好保存的权重文件在哪里。
最后把对应的数据放入指定文件中就可以开始训练了。