自己训练YOLOv3数据

bandieyanzhi

已于 2022-06-08 09:10:37 修改

阅读量394

点赞数

文章标签：计算机视觉 opencv python

于 2022-05-16 22:26:42 首次发布

本文链接：https://blog.csdn.net/bandieyanzhi/article/details/124809326

版权

实习期间做了一个用YOLOv3检测铁柱缺损的工程，缺损类别只有四类，训练数据不多，用到了数据增强以及数据格式转化的代码。

首先用imglabel生成数据的xml文件以及对应图片的jpg或者png格式（是别的同事帮忙标的，这个我就没做过了）

因为检测的是比较贵重的首饰，所以用于训练的样本比较少，用了两个python数据拓展的代码：
1、图片旋转`

import cv2
import math
import numpy as np
import os
import pdb
import xml.etree.ElementTree as ET


class ImgAugemention():
    def __init__(self):
        self.angle = 90

    # rotate_img
    def rotate_image(self, src, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        # convet angle into rad
        rangle = np.deg2rad(angle)  # angle in radians
        # calculate new image width and height
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # map
        return cv2.warpAffine(
            src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
            flags=cv2.INTER_LANCZOS4)

    def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        rangle = np.deg2rad(angle)  # angle in radians
        # now calculate new image width and height
        # get width and heigh of changed image
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # rot_mat: the final rot matrix
        # get the four center of edges in the initial martix，and convert the coord
        point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
        point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
        point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
        point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
        # concat np.array
        concat = np.vstack((point1, point2, point3, point4))
        # change type
        concat = concat.astype(np.int32)
        print(concat)
        rx, ry, rw, rh = cv2.boundingRect(concat)
        return rx, ry, rw, rh

    def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list):
        # assign the rot angles
        if not os.path.exists(img_save_path):
            os.mkdir(img_save_path)
        if not os.path.exists(xml_save_path):
            os.mkdir(xml_save_path)
        for angle in angle_list:
            for img_name in os.listdir(imgs_path):
                # split filename and suffix
                n, s = os.path.splitext(img_name)
                # for the sake of use yol model, only process '.jpg'
                if s == ".jpg":
                    img_path = os.path.join(imgs_path, img_name)
                    img = cv2.imread(img_path)
                    rotated_img = self.rotate_image(img, angle)
                    # 写入图像
                    cv2.imwrite(img_save_path + n + "_" + str(angle) + ".jpg", rotated_img)
                    print("log: [%sd] %s is processed." % (angle, img))
                    xml_url = img_name.split('.')[0] + '.xml'
                    xml_path = os.path.join(xmls_path, xml_url)
                    tree = ET.parse(xml_path)
                    root = tree.getroot()
                    for box in root.iter('bndbox'):
                        xmin = float(box.find('xmin').text)
                        ymin = float(box.find('ymin').text)
                        xmax = float(box.find('xmax').text)
                        ymax = float(box.find('ymax').text)
                        x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
                        # change the coord
                        box.find('xmin').text = str(x)
                        box.find('ymin').text = str(y)
                        box.find('xmax').text = str(x+w)
                        box.find('ymax').text = str(y+h)
                        box.set('updated', 'yes')
                    # write into new xml
                    tree.write(xml_save_path + n + "_" + str(angle) + ".xml")
                print("[%s] %s is processed." % (angle, img_name))


if __name__ == '__main__':
    img_aug = ImgAugemention()
    imgs_path = 'data/images/'  # 图片存放路径
    xmls_path = 'data/annotations'  # 标注文件存放路径
    img_save_path = 'data/images_enhance/'  # 扩增图片保存路径
    xml_save_path = 'data/ann_enhance/'  # 对应标注文件保存路径
    angle_list = [90, 180, 270]
    img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list)

2、图片亮暗度调节以及高斯加噪操作：

import cv2
import numpy as np
import os
import xml.etree.ElementTree as ET
import copy


# 椒盐噪声
def SaltAndPepper(src, percetage=0.9):
    SP_NoiseImg = src.copy()
    SP_NoiseNum = int(percetage * src.shape[0] * src.shape[1])
    for i in range(SP_NoiseNum):
        randR = np.random.randint(0, src.shape[0] - 1)
        randG = np.random.randint(0, src.shape[1] - 1)
        randB = np.random.randint(0, 3)
        if np.random.randint(0, 1) == 0:
            SP_NoiseImg[randR, randG, randB] = 0
        else:
            SP_NoiseImg[randR, randG, randB] = 255
    return SP_NoiseImg


# 高斯噪声
# def addGaussianNoise(image, percetage):
#     G_Noiseimg = image.copy()
#     w = image.shape[1]
#     h = image.shape[0]
#     G_NoiseNum = int(percetage * image.shape[0] * image.shape[1])
#     for i in range(G_NoiseNum):
#         temp_x = np.random.randint(0, h)
#         temp_y = np.random.randint(0, w)
#         G_Noiseimg[temp_x][temp_y][np.random.randint(3)] = np.random.randn(1)[0]
#     return G_Noiseimg


def gasuss_noise(image, mean=0, var=0.01):
    '''
        添加高斯噪声
        mean : 均值
        var : 方差
    '''
    image = np.array(image/255, dtype=float)
    noise = np.random.normal(mean, var ** 0.5, image.shape)
    out = image + noise
#     if out.min() < 0:
#         low_clip = -1.
#     else:
#         low_clip = 0.
    out = np.clip(out, 0, 1)
    out = np.uint8(out*255)
    #cv.imshow("gasuss", out)
    return out


# 昏暗
def darker(image, percetage=0.8):
    image_copy = image.copy()
    w = image.shape[1]
    h = image.shape[0]
    # get darker
    for xi in range(0, w):
        for xj in range(0, h):
            image_copy[xj, xi, 0] = int(image[xj, xi, 0] * percetage)
            image_copy[xj, xi, 1] = int(image[xj, xi, 1] * percetage)
            image_copy[xj, xi, 2] = int(image[xj, xi, 2] * percetage)
    return image_copy


# 亮度增加
def brighter(image, percetage=1.5):
    image_copy = image.copy()
    w = image.shape[1]
    h = image.shape[0]
    # get brighter
    for xi in range(0, w):
        for xj in range(0, h):
            image_copy[xj, xi, 0] = np.clip(int(image[xj, xi, 0] * percetage), a_max=255, a_min=0)
            image_copy[xj, xi, 1] = np.clip(int(image[xj, xi, 1] * percetage), a_max=255, a_min=0)
            image_copy[xj, xi, 2] = np.clip(int(image[xj, xi, 2] * percetage), a_max=255, a_min=0)
    return image_copy


# 旋转
def rotate(image, angle, center=None, scale=1.0):
    (h, w) = image.shape[:2]
    # If no rotation center is specified, the center of the image is set as the rotation center
    if center is None:
        center = (w / 2, h / 2)
    m = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, m, (w, h))
    return rotated


# 翻转
def flip(image):
    flipped_image = np.fliplr(image)
    return flipped_image


# 图片文件夹路径
file_dir = 'data/images/'
xmls_path = 'data/annotations/'
# 保存路径
save_dir = 'data/new_images_enhance/'
xml_save_path = 'data/new_ann_enhance/'

if not os.path.exists(save_dir):
    os.mkdir(save_dir)
if not os.path.exists(xml_save_path):
    os.mkdir(xml_save_path)
# with open(txt_dir, 'r') as f:
for file in os.listdir(file_dir):
    line = file.split('.')[0]
    img_path = file_dir + file
    img = cv2.imread(img_path)
    xml_path = xmls_path + file.split('.')[0] + '.xml'
    tree = ET.parse(xml_path)

    # 变亮、变暗
    img_darker = darker(img)
    cv2.imwrite(save_dir + line + '_darker.jpg', img_darker)
    tree.write(xml_save_path + line + "_darker.xml")
    img_brighter = brighter(img)
    cv2.imwrite(save_dir + line + '_brighter.jpg', img_brighter)
    tree.write(xml_save_path + line + "_brighter.xml")
    img_gasuss_noise = gasuss_noise(img)
    cv2.imwrite(save_dir + line + '_gasuss_noise.jpg', img_darker)
    tree.write(xml_save_path + line + "_gasuss_noise.xml")
    img_SaltAndPepper = SaltAndPepper(img)
    cv2.imwrite(save_dir + line + '_SaltAndPepper.jpg', img_brighter)
    tree.write(xml_save_path + line + "_SaltAndPepper.xml")

这其中有部分地方需要用的jpg图片格式，然后自己还找了一个png转换为jpg的代码：

import os
from PIL import Image

dirname_read="png格式文件夹路径/"   # 注意后面的斜杠
dirname_write="jpg格式输出路径/"
names=os.listdir(dirname_read)
count=0
for name in names:
    img=Image.open(dirname_read+name)
    name=name.split(".")
    if name[-1] == "png":
        name[-1] = "jpg"
        name = str.join(".", name)
        r,g,b,a=img.split()
        img=Image.merge("RGB",(r,g,b))
        to_save_path = dirname_write + name
        img.save(to_save_path)
        count+=1
        print(to_save_path, "------conut：",count)
    else:
        continue

参考的是这位大神的帖子：
https://blog.csdn.net/weixin_44500897/article/details/88568279

数据做好以后就是生成训练集和测试集以及格式的转换了

# coding:utf-8

import os
import random
import argparse
from os import getcwd

parser = argparse.ArgumentParser()
#xml文件的地址，根据自己的数据进行修改 xml一般存放在Annotations下
# parser.add_argument('--xml_path', default='Annotations/4125', type=str, help='input xml label path')
# #数据集的划分，地址选择自己数据下的ImageSets/Main
# parser.add_argument('--txt_path', default='img_lab/ImageSets/Main-4125', type=str, help='output txt label path')

parser.add_argument('--labels_path', default='./labels/', type=str, help='input xml label path')
#数据集的划分，地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='./Main/', type=str, help='output txt label path')

opt = parser.parse_args()

trainval_percent = 1.0
train_percent = 0.8
xmlfilepath = opt.labels_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
    os.makedirs(txtsavepath)

num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)
# wd = getcwd()

file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/valid.txt', 'w')

for i in list_index:
    name = 'data/custom/images/' + total_xml[i][:-4] + '.jpg' + '\n' # 可以按照自己的需要来调整比例
    # name = total_xml[i][:-4] + '\n'
    if i in trainval:
        if i in train:
            file_train.write(name)
        else:
            file_val.write(name)

# file_trainval.close()
file_train.close()
file_val.close()

这里会生成一个train.txt文件以及.valid.txt文件，可以把train.txt理解为训练目录，在训练时先看这个目录，然后去找对应的图片和txt文件。(检查最后一行是否有额外空白行！！只多留一行空白行，目录留两行空白行会报错)

然后是将xml转换为txt格式：

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

sets = ['train', 'valid']
classes = ['hollow', 'gap', 'nosmall', 'nosphere']  ##修改为自己的类别


# 原样保留。size为图片大小
# 将ROI的坐标转换为yolo需要的坐标
# size是图片的w和h
# box里保存的是ROI的坐标（x，y的最大值和最小值）
# 返回值为ROI中心点相对于图片大小的比例坐标，和ROI的w、h相对于图片大小的比例
def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


def convert_annotation(image_add):
    # image_add进来的是带地址的.jpg
    image_add = os.path.split(image_add)[1]  # 截取文件名带后缀
    image_add = image_add[0:image_add.find('.', 1)]  # 删除后缀，现在只有文件名没有后缀
    # 现在传进来的只有图片名没有后缀

    in_file = open('/home/ubuntu/mydata/生成labels/annotations/'  + image_add + '.xml')  # 修改为你自己的输入目录
    out_file = open('/home/ubuntu/mydata/生成labels/labels/%s.txt' % (image_add), 'w')  # 修改为你自己的输出目录

    tree = ET.parse(in_file)
    root = tree.getroot()

    if root.find('size'):

        size = root.find('size')
        w = int(size.find('width').text)  # 偶尔xml标记出错，width或height设置为0了
        h = int(size.find('height').text)  # 需要标记出来，便于单独处理
        # if w == 0:
        #     print("出错！ width或height为0:  " + image_add)
        #     os.remove("G:/set/" + image_add + ".jpg")
        #     os.remove("G:/set/" + image_add + ".xml")
        #     return
        # # 在一个XML中每个Object的迭代
        for obj in root.iter('object'):
            # iter()方法可以递归遍历元素/树的所有子元素
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            # 如果训练标签中的品种不在程序预定品种，或者difficult = 1，跳过此object
            if cls not in classes or int(difficult) == 1:
                continue
            # cls_id 只等于1
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            # b是每个Object中，一个bndbox上下左右像素的元组
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
                 float(xmlbox.find('ymax').text))
            bb = convert((w, h), b)
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
    # else:
    #     print("出错！xml缺少size:  " + image_add)  # 偶尔xml缺少size，需要标记出来，便于单独处理
    #     os.remove("G:/set/" + image_add + ".jpg")
    #     os.remove("G:/set/" + image_add + ".xml")


image_adds = open('/home/ubuntu/mydata/生成labels/Main/train.txt')  # 修改为你自己的训练数据集目录
for image_add in image_adds:
    # print(image_add)
    # image_add = image_add.strip()
    # print (image_add)
    convert_annotation(image_add)

最后数据处理完就是修改网络里面一些配置文件以及路径，因为不同版本文件不太一样，总的来说修改以下几点：
1、类别数，train.txt和vaild.txt以及对应的路径信息
2、在classes.names里面修改类别名字（也是只留一个空白行，不留空白行最后一个类别无法读入！）
3、修改网络参数里面fliters的值，一共有三个，可以ctrl+F查找YOLO关键词，在每个YOLO的上一个卷积层的最后一个fliters改为3*（5+classes的数量）
4、无论是train.py test.py以及detect.py都需要修改def main开始前面那些读取配置文件的路径，同时看好train.py里面训练好保存的权重文件在哪里。

最后把对应的数据放入指定文件中就可以开始训练了。

bandieyanzhi

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
自己训练YOLOv3数据

实习期间做了一个用YOLOv3检测金珠缺损的工程，缺损类别只有四类，训练数据不多，用到了数据增强以及数据格式转化的代码。首先用imglabel生成数据的xml文件以及对应图片的jpg或者png格式（是别的同事帮忙标的，这个我就没做过了）因为检测的是比较贵重的首饰，所以用于训练的样本比较少，用了两个python数据拓展的代码：1、图片旋转`import cv2import mathimport numpy as npimport osimport pdbimport xml.etree.El
复制链接

扫一扫