yolov5训练前准备工作（采样、制作数据集、数据集增强、数据集预处理）

萧易风船长

已于 2022-04-20 17:47:07 修改

阅读量7.7k

点赞数 6

分类专栏：小可可的深度学习小可可的OpenCV之旅小可可的机器人之旅文章标签： python opencv yolo yolov5 图像识别

于 2022-04-20 12:59:06 首次发布

本文链接：https://blog.csdn.net/qq_35975855/article/details/124292250

版权

小可可的机器人之旅同时被 3 个专栏收录

19 篇文章 1 订阅

订阅专栏

小可可的深度学习

5 篇文章 1 订阅

订阅专栏

小可可的OpenCV之旅

3 篇文章 0 订阅

订阅专栏

本文详细介绍了如何通过收集图片、视频转换、批量重命名、使用LabelImg标注、数据增强、XML转TXT等步骤，为Yolov5项目准备训练数据的过程，包括摄像头采集、视频转图片、XML文件操作和数据集划分等关键步骤。

摘要由CSDN通过智能技术生成

写在前面

训练数据可以有多种输入方式，本文提到了其中一种。使用的时候注意工作路径。

使用方法：

收集图片，或使用video_2_jpg.py采样视频，（用cam_video.py拍视频，用get_img.py拍照片）
批量重命名图片，最好都是数字
把图片放在全英文路径下，开始用LabelImg标注，生成xml文件
把标注图片和文件分别放在images、annotations两个文件夹
用data_agumentation.py进行数据增强，同时生成图片和xml
用png_to_jpg.py把可能有的png转化为jpg
把图片放yolov5s-master文件夹的datasets/images文件夹下，把标注放在datasets/annotations下，用split_train_val.py分割测试集和训练集
用voc_label.py把datasets/annotations文件夹下的xml输出为labels文件夹下的txt，把分割数据集输出的转化为路径，这会直接输入到yolo，yolo应该是默认标注在图片目录父目录下的lables文件夹
修改模型yaml文件里的类别数量和名称
修改data的yaml文件，大致如下

path: datasets  # dataset root dir
train: train.txt  # train images (relative to 'path') 
val: val.txt  # val images (relative to 'path')
nc: 19  # number of classes
names: [自己的类别名]

代码

下面是相应文件的代码
get_img.py

# coding:utf-8
import cv2
import numpy as np
import time
# 参数为0，调用内置摄像头，如果有其他的摄像头可以调整参数为1，2等
cap = cv2.VideoCapture(1)
cameraMatrix =np.matrix([[804.4703,-4.7160,404.5110],[0,799.1279,351.8036],[0,0,1]])
distCoeffs = np.matrix([[-0.5834],[0.7615],[0.0026],[0.0107],[0]])
R = np.identity(3)

while True:    
    # 从摄像头读取图片    
    success, img = cap.read()
    t = time.time()
    timestamp = int(round(t * 1000))    #毫秒级时间戳
    img_path = f"new_data/{timestamp}.jpg"  
    img_size = img.shape[:2]
    newCameraMatrix, _ = cv2.getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, img_size, 1, img_size, 0)
    map1, map2 = cv2.initUndistortRectifyMap(cameraMatrix, distCoeffs, R, newCameraMatrix, img_size, cv2.CV_16SC2)
    rectified_img = cv2.remap(img, map1, map2, cv2.INTER_LINEAR)
    # 显示摄像头    
    cv2.imshow('----------please enter "s" to take a picture----------', rectified_img)    
    # 保持画面的持续,无限期等待输入    
    k = cv2.waitKey(1)    # k == 27 通过esc键退出摄像 ESC(ASCII码为27)    
    if k == 27:        
        cv2.destroyAllWindows()        
        break    
    elif k == ord("s"):        
        # 通过s键保存图片，并退出。        
        cv2.imwrite(img_path, rectified_img)           
# 关闭摄像头
cap.release()
cv2.destroyAllWindows()

cam_video.py


# -*- coding: UTF-8 -*-
import cv2
import os
import time

cap = cv2.VideoCapture(0)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)   
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  
size = (int(width),int(height))   
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 参数还可以 DIVX，XVID，MJPG，X264，WMV1，WMV2。
path = os.getcwd() 
if not(os.path.exists(path)):
    os.makedirs(path)

#创建VideoWriter，用于写视频
out = cv2.VideoWriter( path + '\\' + time.strftime(r"%Y-%m-%d_%H-%M-%S",time.localtime()) + '.mp4', fourcc, 24.0, size)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("No frame")
        break
    cv2.imshow('frame', frame)
    out.write(frame)
    if cv2.waitKey(1) == ord('q'):  #按Q键退出
        break

cap.release()
out.release()
cv2.destroyAllWindows()

video_2_jpg.py

# -*- coding: UTF-8 -*-
import cv2
import os

filepath = r'./cam_video/'#视频路径
pathDir = os.listdir(filepath)

#如果共同文件夹
save_path = './cam_img_data'
if not os.path.exists(save_path):
    os.makedirs(save_path)

cnt = 1
#i = 0
i = 4000
interval = 5

for Dir in pathDir:
    video_path =filepath+Dir
    if video_path[-4:]!='.mp4':
        continue
    video_name = Dir[:-4]
    print(filepath + video_name + '.mp4')
    video = cv2.VideoCapture(filepath + video_name+'.mp4')
    if video.isOpened():
        rval, frame = video.read()
        print('open successfully')
    else:
        print('fail to open')
        continue
    #如果一个视频一个文件夹
    #if not os.path.exists(filepath + video_name):
    #    os.makedirs(filepath + video_name)

    while rval:
        rval, frame = video.read()
        if (cnt % interval == 0):
            i += 1
            #如果一个视频一个文件夹
            #cv2.imwrite(filepath + video_name + '/image{}.jpg'.format(i), frame)
            #如果共同文件夹
            frame  = cv2.flip(frame,0)#因为我的相机是倒着装的
            try:
                cv2.imwrite(save_path + '/01{}.jpg'.format(i), frame)
            except:
                continue
        cnt += 1
    video.release()
    print('write successfully')

data_agumentation.py

# -*- coding: utf-8 -*- 
import xml.etree.ElementTree as ET
import os
import numpy as np
from PIL import Image
import shutil
import imgaug as ia
from imgaug import augmenters as iaa
#【注意】文件命名为数字，xml文件别带中文,用labelimg标注的时候图片路径放在没有中文的地方
ia.seed(42)

def read_xml_annotation(root, image_id):
    in_file = open(os.path.join(root, image_id))
    tree = ET.parse(in_file)
    root = tree.getroot()
    bndboxlist = []

    for object in root.findall('object'):  # 找到root节点下的所有country节点
        bndbox = object.find('bndbox')  # 子节点下节点rank的值

        xmin = int(bndbox.find('xmin').text)
        xmax = int(bndbox.find('xmax').text)
        ymin = int(bndbox.find('ymin').text)
        ymax = int(bndbox.find('ymax').text)
        # print(xmin,ymin,xmax,ymax)
        bndboxlist.append([xmin, ymin, xmax, ymax])
        # print(bndboxlist)

    bndbox = root.find('object').find('bndbox')
    return bndboxlist


# (506.0000, 330.0000, 528.0000, 348.0000) -> (520.4747, 381.5080, 540.5596, 398.6603)
def change_xml_annotation(root, image_id, new_target):
    new_xmin = new_target[0]
    new_ymin = new_target[1]
    new_xmax = new_target[2]
    new_ymax = new_target[3]

    in_file = open(os.path.join(root, str(image_id) + '.xml'))  # 这里root分别由两个意思
    tree = ET.parse(in_file)
    xmlroot = tree.getroot()
    object = xmlroot.find('object')
    bndbox = object.find('bndbox')
    xmin = bndbox.find('xmin')
    xmin.text = str(new_xmin)
    ymin = bndbox.find('ymin')
    ymin.text = str(new_ymin)
    xmax = bndbox.find('xmax')
    xmax.text = str(new_xmax)
    ymax = bndbox.find('ymax')
    ymax.text = str(new_ymax)
    tree.write(os.path.join(root, str("%06d" % (str(id) + '.xml'))))


def change_xml_list_annotation(root, image_id, new_target, saveroot, id):
    in_file = open(os.path.join(root, str(image_id) + '.xml'))  # 这里root分别由两个意思
    tree = ET.parse(in_file)
    elem = tree.find('filename')
    elem.text = (str("%06d" % int(id)) + '.jpg')
    xmlroot = tree.getroot()
    index = 0

    for object in xmlroot.findall('object'):  # 找到root节点下的所有country节点
        bndbox = object.find('bndbox')  # 子节点下节点rank的值

        # xmin = int(bndbox.find('xmin').text)
        # xmax = int(bndbox.find('xmax').text)
        # ymin = int(bndbox.find('ymin').text)
        # ymax = int(bndbox.find('ymax').text)

        new_xmin = new_target[index][0]
        new_ymin = new_target[index][1]
        new_xmax = new_target[index][2]
        new_ymax = new_target[index][3]

        xmin = bndbox.find('xmin')
        xmin.text = str(new_xmin)
        ymin = bndbox.find('ymin')
        ymin.text = str(new_ymin)
        xmax = bndbox.find('xmax')
        xmax.text = str(new_xmax)
        ymax = bndbox.find('ymax')
        ymax.text = str(new_ymax)

        index = index + 1

    tree.write(os.path.join(saveroot, str("%06d" % int(id)) + '.xml'))


def mkdir(path):
    # 去除首位空格
    path = path.strip()
    # 去除尾部 \ 符号
    path = path.rstrip("/")
    # 判断路径是否存在
    # 存在     True
    # 不存在   False
    isExists = os.path.exists(path)
    # 判断结果
    if not isExists:
        # 如果不存在则创建目录
        # 创建目录操作函数
        os.makedirs(path)
        print(path + 'is successfully created!')
        return True
    else:
        # 如果目录存在则不创建，并提示目录已存在
        print(path + 'already exits ')
        return False


if __name__ == "__main__":

    IMG_DIR = "new_data/image"
    XML_DIR = "new_data/annotation"

    AUG_XML_DIR = "argu_new_data/annotations"  # 存储增强后的XML文件夹路径
    try:
        shutil.rmtree(AUG_XML_DIR)
    except FileNotFoundError as e:
        a = 1
    mkdir(AUG_XML_DIR)

    AUG_IMG_DIR = "argu_new_data/images"  # 存储增强后的影像文件夹路径
    try:
        shutil.rmtree(AUG_IMG_DIR)
    except FileNotFoundError as e:
        a = 1
    mkdir(AUG_IMG_DIR)

    AUGLOOP = 7  # 每张影像增强的数量

    boxes_img_aug_list = []
    new_bndbox = []
    new_bndbox_list = []

    # 影像增强
    seq = iaa.Sequential([
        iaa.Flipud(0.5),  # vertically flip 20% of all images
        iaa.Fliplr(0.5),  # 镜像
        iaa.Multiply((1.2, 1.5)),  # change brightness, doesn't affect BBs
        iaa.GaussianBlur(sigma=(0, 2.0)),  # iaa.GaussianBlur(0.5),
        iaa.Affine(
            translate_px={"x": 15, "y": 15},
            scale=(0.8, 0.95),
            rotate=(-30, 30)
        )  # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs
    ])

    for root, sub_folders, files in os.walk(IMG_DIR):

        for name in files:

            bndbox = read_xml_annotation(XML_DIR, name[:-4] + '.xml')
            shutil.copy(os.path.join(XML_DIR, name[:-4] + '.xml'), AUG_XML_DIR)
            shutil.copy(os.path.join(IMG_DIR, name), AUG_IMG_DIR)

            for epoch in range(AUGLOOP):
                seq_det = seq.to_deterministic()  # 保持坐标和图像同步改变，而不是随机
                # 读取图片
                img = Image.open(os.path.join(IMG_DIR, name))
                # sp = img.size
                img = np.asarray(img)
                # bndbox 坐标增强
                for i in range(len(bndbox)):
                    bbs = ia.BoundingBoxesOnImage([
                        ia.BoundingBox(x1=bndbox[i][0], y1=bndbox[i][1], x2=bndbox[i][2], y2=bndbox[i][3]),
                    ], shape=img.shape)

                    bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]
                    boxes_img_aug_list.append(bbs_aug)

                    # new_bndbox_list:[[x1,y1,x2,y2],...[],[]]
                    n_x1 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x1)))
                    n_y1 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y1)))
                    n_x2 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x2)))
                    n_y2 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y2)))
                    if n_x1 == 1 and n_x1 == n_x2:
                        n_x2 += 1
                    if n_y1 == 1 and n_y2 == n_y1:
                        n_y2 += 1
                    if n_x1 >= n_x2 or n_y1 >= n_y2:
                        print('error', name)
                    new_bndbox_list.append([n_x1, n_y1, n_x2, n_y2])
                # 存储变化后的图片
                image_aug = seq_det.augment_images([img])[0]
                path = os.path.join(AUG_IMG_DIR,
                                    str("%06d" % (len(files) + int(name[:-4]) + epoch * 1000)) + name[-4:])
                image_auged = bbs.draw_on_image(image_aug, thickness=0)
                Image.fromarray(image_auged).save(path)

                # 存储变化后的XML
                change_xml_list_annotation(XML_DIR, name[:-4], new_bndbox_list, AUG_XML_DIR,
                                           len(files) + int(name[:-4]) + epoch * 1000)
                print(str("%06d" % (len(files) + int(name[:-4]) + epoch * 250)) + '.xml')
                new_bndbox_list = []

png_to_jpg.py

from PIL import Image
import os
import shutil
if __name__ == '__main__':
    path = './images'
    save_path = './jpg_images'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    files = os.listdir(path)
    for name in files:
        save_filepath = os.path.join(save_path, name[:-4]+'.jpg')
        filepath = os.path.join(path,name)
        if name[-4:] == '.png':
            img = Image.open(filepath)
            img = img.convert('RGB')
            img.save(save_filepath, quality=95)
        else:
            shutil.copy(filepath,save_filepath)

split_train_val.py

# coding:utf-8
import os
import random
import argparse

parser = argparse.ArgumentParser()
#xml文件的地址，根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='datasets/annotations', type=str, help='input xml label path')
#数据集的划分，地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='datasets/ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()

trainval_percent = 1.0
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
    os.makedirs(txtsavepath)

num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)

file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')

for i in list_index:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        file_trainval.write(name)
        if i in train:
            file_train.write(name)
        else:
            file_val.write(name)
    else:
        file_test.write(name)

file_trainval.close()
file_train.close()
file_val.close()
file_test.close()

voc_label.py

# -*- coding: utf-8 -*-

import xml.etree.ElementTree as ET
import os
from os import getcwd
# 运行目录为yolov5-master，把xml转为txt，配合split_train_val.py划分训练集和验证集，需要数据全为jpg
# 少用全局路径，会产生转义字符
sets = ['train', 'val', 'test']
classes = ['ad', 'ad1', 'ad2', 'bskl', 'dp', 'dp1', 'hn', 'hsfk', 'jdb', 'jdb1', 'lsfk', 'mf', 'mf1', 'qdpj', 'wlj', 'xb', 'xhpj', 'xhpj1', 'xhpj2']
#abs_path = os.getcwd()
#print(abs_path)

def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

def convert_annotation(image_id):
    in_file = open('datasets/annotations/%s.xml' % (image_id), encoding='UTF-8')
    out_file = open('datasets/labels/%s.txt' % (image_id), 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    for obj in root.iter('object'):
        #difficult = obj.find('Difficult').text
        difficult = 0
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            print(image_id," wrong class name:"+cls)
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        b1, b2, b3, b4 = b
        # 标注越界修正
        if b2 > w:
            b2 = w
        if b4 > h:
            b4 = h
        b = (b1, b2, b3, b4)
        bb = convert((w, h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

wd = getcwd()
for image_set in sets:
    if not os.path.exists('datasets/labels/'):
        os.makedirs('datasets/labels/')
    image_ids = open('datasets/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
    list_file = open('datasets/%s.txt' % (image_set), 'w')
    for image_id in image_ids:
        #list_file.write(abs_path + '/datasets/images/%s.jpg\n' % (image_id))
        list_file.write('datasets/images/%s.jpg\n' % (image_id))
        #convert_annotation(image_id)
    list_file.close()