yolov3训练的简单记录

最新推荐文章于 2024-01-15 14:58:13 发布

一个叫kevin的死肥宅

最新推荐文章于 2024-01-15 14:58:13 发布

阅读量312

点赞数

本文链接：https://blog.csdn.net/weixin_43003949/article/details/105063835

版权

准备数据集

数据集主要来源于两部分，第一部分是来自于coco数据集的一部分，通过csdn上找的代码提取出来了几个想要的类转换成xml格式（coco那个包安装起来真的是一部血泪史，那一个包安了一个下午），时间有点久了也就找不到当时的解决办法了。
转换代码如下（来源是csdn）:

from pycocotools.coco import COCO

import skimage.io as io

import matplotlib.pyplot as plt

import pylab, os, cv2, shutil

from lxml import etree, objectify

from tqdm import tqdm

import random

from PIL import Image

pylab.rcParams['figure.figsize'] = (8.0, 10.0)

dataDir = '..'

CK5cats = ['car', 'truck', 'fire hydrant']

CKdir = "E/"

CKimg_dir = CKdir + "/" + "images"

CKanno_dir = CKdir + "/" + "Annotations"


def mkr(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)


def showimg(coco, dataType, img, CK5Ids):
    global dataDir

    I = io.imread('%s/%s/%s' % (dataDir, dataType, img['file_name']))

    plt.imshow(I)

    plt.axis('off')

    annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)

    anns = coco.loadAnns(annIds)

    coco.showAnns(anns)

    plt.show()


def save_annotations(dataType, filename, objs):
    annopath = CKanno_dir + "/" + filename[:-3] + "xml"

    img_path = dataDir + "/" + dataType + "/" + filename

    dst_path = CKimg_dir + "/" + filename

    img = cv2.imread(img_path)

    im = Image.open(img_path)

    if im.mode != "RGB":
        print(filename + " not a RGB image")

        im.close()

        return

    im.close()

    shutil.copy(img_path, dst_path)

    E = objectify.ElementMaker(annotate=False)

    anno_tree = E.annotation(

        E.folder('1'),

        E.filename(filename),

        E.source(

            E.database('CKdemo'),

            E.annotation('VOC'),

            E.image('CK')

        ),

        E.size(

            E.width(img.shape[1]),

            E.height(img.shape[0]),

            E.depth(img.shape[2])

        ),

        E.segmented(0)

    )

    for obj in objs:
        E2 = objectify.ElementMaker(annotate=False)

        anno_tree2 = E2.object(

            E.name(obj[0]),

            E.pose(),

            E.truncated("0"),

            E.difficult(0),

            E.bndbox(

                E.xmin(obj[2]),

                E.ymin(obj[3]),

                E.xmax(obj[4]),

                E.ymax(obj[5])

            )

        )

        anno_tree.append(anno_tree2)

    etree.ElementTree(anno_tree).write(annopath, pretty_print=True)


def showbycv(coco, dataType, img, classes, CK5Ids):
    global dataDir

    filename = img['file_name']

    filepath = '%s/%s/%s' % (dataDir, dataType, filename)

    I = cv2.imread(filepath)

    annIds = coco.getAnnIds(imgIds=img['id'], catIds=CK5Ids, iscrowd=None)

    anns = coco.loadAnns(annIds)

    objs = []

    for ann in anns:

        name = classes[ann['category_id']]

        if name in CK5cats:

            if 'bbox' in ann:
                bbox = ann['bbox']

                xmin = (int)(bbox[0])

                ymin = (int)(bbox[1])

                xmax = (int)(bbox[2] + bbox[0])

                ymax = (int)(bbox[3] + bbox[1])

                obj = [name, 1.0, xmin, ymin, xmax, ymax]

                objs.append(obj)

                cv2.rectangle(I, (xmin, ymin), (xmax, ymax), (255, 0, 0))

                cv2.putText(I, name, (xmin, ymin), 3, 1, (0, 0, 255))

    save_annotations(dataType, filename, objs)

    cv2.imshow("img", I)

    cv2.waitKey(1)


def catid2name(coco):
    classes = dict()

    for cat in coco.dataset['categories']:
        classes[cat['id']] = cat['name']

        # print(str(cat['id'])+":"+cat['name'])

    return classes


def get_CK5():
    mkr(CKimg_dir)

    mkr(CKanno_dir)

    dataTypes = ['train2014', 'val2014']

    for dataType in dataTypes:

        annFile = '{}/annotations_trainval2014/annotations/instances_{}.json'.format(dataDir, dataType)

        coco = COCO(annFile)

        CK5Ids = coco.getCatIds(catNms=CK5cats)

        classes = catid2name(coco)

        for srccat in CK5cats:

            print(dataType + ":" + srccat)

            catIds = coco.getCatIds(catNms=[srccat])

            imgIds = coco.getImgIds(catIds=catIds)

            # imgIds=imgIds[0:100]

            for imgId in tqdm(imgIds):
                img = coco.loadImgs(imgId)[0]

                showbycv(coco, dataType, img, classes, CK5Ids)

                # showimg(coco,dataType,img,CK5Ids)


# split train and test for training

def split_traintest(trainratio=0.7, valratio=0.2, testratio=0.1):
    dataset_dir = CKdir

    files = os.listdir(CKimg_dir)

    trains = []

    vals = []

    trainvals = []

    tests = []

    random.shuffle(files)

    for i in range(len(files)):

        filepath = CKimg_dir + "/" + files[i][:-3] + "jpg"

        if (i < trainratio * len(files)):

            trains.append(files[i])

            trainvals.append(files[i])

        elif i < (trainratio + valratio) * len(files):

            vals.append(files[i])

            trainvals.append(files[i])

        else:

            tests.append(files[i])

    # write txt files for yolo

    with open(dataset_dir + "/trainval.txt", "w")as f:

        for line in trainvals:
            line = CKimg_dir + "/" + line

            f.write(line + "\n")

    with open(dataset_dir + "/test.txt", "w") as f:

        for line in tests:
            line = CKimg_dir + "/" + line

            f.write(line + "\n")

    # write files for voc

    maindir = dataset_dir + "/" + "ImageSets/Main"

    mkr(maindir)

    with open(maindir + "/train.txt", "w") as f:

        for line in trains:
            line = line[:line.rfind(".")]

            f.write(line + "\n")

    with open(maindir + "/val.txt", "w") as f:

        for line in vals:
            line = line[:line.rfind(".")]

            f.write(line + "\n")

    with open(maindir + "/trainval.txt", "w") as f:

        for line in trainvals:
            line = line[:line.rfind(".")]

            f.write(line + "\n")

    with open(maindir + "/test.txt", "w") as f:

        for line in tests:
            line = line[:line.rfind(".")]

            f.write(line + "\n")

    print("spliting done")


if __name__ == "__main__":
    get_CK5()

    split_traintest()

另一部分来自于自己标注的数据，工具推荐使用labelimg，使用很方便，然后快捷键也很好用，可以直接在github上下载已经打包好的版本。然后由于自己手动标注的数据并不多，每个类只有两百多张，感觉完全无法达到要求（我太懒了），在好心的老哥的提醒下找到了数据增强的代码，然后使用数据增强进行数据扩充，勉强达到了要求。
数据增强代码如下，来源也是csdn,我太菜了，自己写不出来：

import xml.etree.ElementTree as ET
import pickle
import os
from os import getcwd
import numpy as np
from PIL import Image
import shutil
import matplotlib.pyplot as plt

import imgaug as ia
from imgaug import augmenters as iaa


ia.seed(1)


def read_xml_annotation(root, image_id):
    in_file = open(os.path.join(root, image_id))
    tree = ET.parse(in_file)
    root = tree.getroot()
    bndboxlist = []

    for object in root.findall('object'):  # 找到root节点下的所有country节点
        bndbox = object.find('bndbox')  # 子节点下节点rank的值

        xmin = int(bndbox.find('xmin').text)
        xmax = int(bndbox.find('xmax').text)
        ymin = int(bndbox.find('ymin').text)
        ymax = int(bndbox.find('ymax').text)
        # print(xmin,ymin,xmax,ymax)
        bndboxlist.append([xmin, ymin, xmax, ymax])
        # print(bndboxlist)

    bndbox = root.find('object').find('bndbox')
    return bndboxlist


# (506.0000, 330.0000, 528.0000, 348.0000) -> (520.4747, 381.5080, 540.5596, 398.6603)
def change_xml_annotation(root, image_id, new_target):
    new_xmin = new_target[0]
    new_ymin = new_target[1]
    new_xmax = new_target[2]
    new_ymax = new_target[3]

    in_file = open(os.path.join(root, str(image_id) + '.xml'))  # 这里root分别由两个意思
    tree = ET.parse(in_file)
    xmlroot = tree.getroot()
    object = xmlroot.find('object')
    bndbox = object.find('bndbox')
    xmin = bndbox.find('xmin')
    xmin.text = str(new_xmin)
    ymin = bndbox.find('ymin')
    ymin.text = str(new_ymin)
    xmax = bndbox.find('xmax')
    xmax.text = str(new_xmax)
    ymax = bndbox.find('ymax')
    ymax.text = str(new_ymax)
    tree.write(os.path.join(root, str("%06d" % (str(id) + '.xml'))))


def change_xml_list_annotation(root, image_id, new_target, saveroot, id):
    in_file = open(os.path.join(root, str(image_id) + '.xml'))  # 这里root分别由两个意思
    tree = ET.parse(in_file)
    elem = tree.find('filename')
    elem.text = (str("%06d" % int(id)) + '.jpg')
    xmlroot = tree.getroot()
    index = 0

    for object in xmlroot.findall('object'):  # 找到root节点下的所有country节点
        bndbox = object.find('bndbox')  # 子节点下节点rank的值

        # xmin = int(bndbox.find('xmin').text)
        # xmax = int(bndbox.find('xmax').text)
        # ymin = int(bndbox.find('ymin').text)
        # ymax = int(bndbox.find('ymax').text)

        new_xmin = new_target[index][0]
        new_ymin = new_target[index][1]
        new_xmax = new_target[index][2]
        new_ymax = new_target[index][3]

        xmin = bndbox.find('xmin')
        xmin.text = str(new_xmin)
        ymin = bndbox.find('ymin')
        ymin.text = str(new_ymin)
        xmax = bndbox.find('xmax')
        xmax.text = str(new_xmax)
        ymax = bndbox.find('ymax')
        ymax.text = str(new_ymax)

        index = index + 1

    tree.write(os.path.join(saveroot, str("%06d" % int(id)) + '.xml'))


def mkdir(path):
    # 去除首位空格
    path = path.strip()
    # 去除尾部 \ 符号
    path = path.rstrip("\\")
    # 判断路径是否存在
    # 存在     True
    # 不存在   False
    isExists = os.path.exists(path)
    # 判断结果
    if not isExists:
        # 如果不存在则创建目录
        # 创建目录操作函数
        os.makedirs(path)
        print(path + ' 创建成功')
        return True
    else:
        # 如果目录存在则不创建，并提示目录已存在
        print(path + ' 目录已存在')
        return False


if __name__ == "__main__":

    IMG_DIR = "E:/data/JPEGImage"
    XML_DIR = "E:/data/Annotations"

    AUG_XML_DIR = "./Annotations"  # 存储增强后的XML文件夹路径
    try:
        shutil.rmtree(AUG_XML_DIR)
    except FileNotFoundError as e:
        a = 1
    mkdir(AUG_XML_DIR)

    AUG_IMG_DIR = "./JPEGImage"  # 存储增强后的影像文件夹路径
    try:
        shutil.rmtree(AUG_IMG_DIR)
    except FileNotFoundError as e:
        a = 1
    mkdir(AUG_IMG_DIR)

    AUGLOOP = 20  # 每张影像增强的数量

    boxes_img_aug_list = []
    new_bndbox = []
    new_bndbox_list = []

    # 影像增强
    seq = iaa.Sequential([
        iaa.Flipud(0.5),  # vertically flip 20% of all images
        iaa.Fliplr(0.5),  # 镜像
        iaa.Multiply((1.2, 1.5)),  # change brightness, doesn't affect BBs
        iaa.GaussianBlur(sigma=(0, 3.0)),  # iaa.GaussianBlur(0.5),
        iaa.Affine(
            translate_px={"x": 15, "y": 15},
            scale=(0.8, 0.95),
            rotate=(-30, 30)
        )  # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs
    ])

    for root, sub_folders, files in os.walk(XML_DIR):

        for name in files:

            bndbox = read_xml_annotation(XML_DIR, name)
            shutil.copy(os.path.join(XML_DIR, name), AUG_XML_DIR)
            shutil.copy(os.path.join(IMG_DIR, name[:-4] + '.jpg'), AUG_IMG_DIR)

            for epoch in range(AUGLOOP):
                seq_det = seq.to_deterministic()  # 保持坐标和图像同步改变，而不是随机
                # 读取图片
                img = Image.open(os.path.join(IMG_DIR, name[:-4] + '.jpg'))
                # sp = img.size
                img = np.asarray(img)
                # bndbox 坐标增强
                for i in range(len(bndbox)):
                    bbs = ia.BoundingBoxesOnImage([
                        ia.BoundingBox(x1=bndbox[i][0], y1=bndbox[i][1], x2=bndbox[i][2], y2=bndbox[i][3]),
                    ], shape=img.shape)

                    bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]
                    boxes_img_aug_list.append(bbs_aug)

                    # new_bndbox_list:[[x1,y1,x2,y2],...[],[]]
                    n_x1 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x1)))
                    n_y1 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y1)))
                    n_x2 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x2)))
                    n_y2 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y2)))
                    if n_x1 == 1 and n_x1 == n_x2:
                        n_x2 += 1
                    if n_y1 == 1 and n_y2 == n_y1:
                        n_y2 += 1
                    if n_x1 >= n_x2 or n_y1 >= n_y2:
                        print('error', name)
                    new_bndbox_list.append([n_x1, n_y1, n_x2, n_y2])
                # 存储变化后的图片
                image_aug = seq_det.augment_images([img])[0]
                path = os.path.join(AUG_IMG_DIR,
                                    str("%06d" % (len(files) + int(name[:-4]) + epoch * 250)) + '.jpg')
                image_auged = bbs.draw_on_image(image_aug, thickness=0)
                Image.fromarray(image_auged).save(path)

                # 存储变化后的XML
                change_xml_list_annotation(XML_DIR, name[:-4], new_bndbox_list, AUG_XML_DIR,
                                           len(files) + int(name[:-4]) + epoch * 250)
                print(str("%06d" % (len(files) + int(name[:-4]) + epoch * 250)) + '.jpg')
                new_bndbox_list = []

在准备号数据集后我曾按照比较正规的方式向下推进，即将数据集文件夹格式变成比较规范的voc格式然后通过voc_annotion来划分训练集验证集，具体的方法我是参考的这篇文章
但是在按照上文的一切准备就绪后，放入服务器进行训练的时候，却报了错，大约是index错误，经过查找与询问，最后得知可能是自己标注的一些数据集不符合规范导致报错，这个错误困扰了我两天，很高兴可以得到Dontla老哥的热心帮助，在他的提示下我得知应该把xml格式转换为yolo的txt格式然后进行数据清洗，转换格式的代码如下：

import os

import xml.etree.ElementTree as ET

dirpath = './Annotation/'  # 原来存放xml文件的目录

newdir = './txt/'  # 修改label后形成的txt目录
classes = ['door','brick','garbage','car','truck','fire hydrant','dump','EC']

if not os.path.exists(newdir):
    os.makedirs(newdir)

for fp in os.listdir(dirpath):

    root = ET.parse(os.path.join(dirpath, fp)).getroot()

    xmin, ymin, xmax, ymax = 0, 0, 0, 0

    sz = root.find('size')

    width = float(sz[0].text)

    height = float(sz[1].text)

    filename = root.find('filename').text

    for child in root.findall('object'):  # 找到图片中的所有框

        # print(child.find('name').text)

        sub = child.find('bndbox')  # 找到框的标注值并进行读取

        label = child.find('name').text

        xmin = float(sub[0].text)

        ymin = float(sub[1].text)

        xmax = float(sub[2].text)

        ymax = float(sub[3].text)

        try:  # 转换成yolov3的标签格式，需要归一化到（0-1）的范围内

            x_center = (xmin + xmax) / (2 * width)

            y_center = (ymin + ymax) / (2 * height)

            w = (xmax - xmin) / width

            h = (ymax - ymin) / height

        except ZeroDivisionError:

            print(filename, '的 width有问题')
        if str(label)==classes[0]:
            a=0
        elif str(label)==classes[1]:
            a=1
        elif str(label) == classes[2]:
            a=2
        elif str(label) == classes[3]:
            a=3
        elif str(label) == classes[4]:
            a=4
        elif str(label) == classes[5]:
            a=5
        elif str(label) == classes[6]:
            a=6
        elif str(label) == classes[7]:
            a=7
        with open(os.path.join(newdir, fp.split('.')[0] + '.txt'), 'a+') as f:

            f.write(' '.join([str(a), str(x_center), str(y_center), str(w), str(h) + '\n']))

print('ok')

之后就可以将所有xml转换为txt文件，然后使用命名软件将图片与标注文件统一进行重命名，我是都变成了纯数字的名字，然后通过老哥写的数据清洗的文件根据自己的要求稍微改了一下进行数据清洗以及划分成训练集与测试集（3：1），代码如下；

# -*- encoding: utf-8 -*-
"""
@File    : convert.py
@Time    : 2019/10/22 9:26
@Author  : Dontla
@Email   : sxana@qq.com
@Software: PyCharm
"""
import os
import re
import shutil

import cv2
import random


# 排序函数，对文件列表进行排序(filenames为文件夹文件的文件名的字符串列表，pattern为正则表达式，它是字符串类型)
def sort_filenames(filenames, pattern):
    # （1）可以以len排序，len相同的字符串，会再以0-9排序，能获得我们想要的结果
    # filenames.sort(key=len)

    # （2）这种排序失败了
    # filenames.sort(key=lambda x: x[16:])
    # print(filenames[0][16:])
    # 1).txt

    # （3）用lambda配合正则表达式（将filenames中对象一一取出赋给x，通过冒号后的表达式运算后将结果返回给key）
    # 数字字符串排序貌似还是以字符顺序而不是数字大小来排的，可能要先转化为数字（而re.findall('\((.*?)\)', x)返回的是字符串列表，要把它转换成数字列表）
    filenames.sort(key=lambda x: list(map(eval, re.findall(pattern, x))))

    # 注意括号前一定要添加转义字符“\”，不过有一个疑问，按照'((.*?))'排序为啥结果也正确？？
    # print(filenames[0])
    # f_cotton-g_top (1).txt
    # print(re.findall('\((.*?)\)', filenames[0]))
    # ['1']
    # print(re.findall('((.*?))', filenames[0]))
    # [('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', ''), ('', '')]


def extract_content(content_):
    # 注意，一开始用的第一种，结果只有一行的情况没有被提取出来，要去掉后面的\n，谨记
    # content_extract = re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)\n', content)
    # content_extract = re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)', content)
    content_extract_ = re.findall('(\d+.?\d*) (\d+.?\d*) (\d+.?\d*) (\d+.?\d*) (\d+.?\d*)', content_)
    # print(content_extract_)
    return content_extract_


# 20200216:直接从文件按行读取
def extract_content_readlines(content):
    content_extract = []
    for line in content:
        line = line.strip()
        # print('line：{}'.format(line))
        # line：0 0.248438 0.255556 0.128125 0.194444
        # line：0 0.571094 0.118056 0.118750 0.180556
        # line：0 0.457422 0.530556 0.113281 0.180556
        # ...
        # content_extract.append(re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)', line))
        content_extract.append(re.findall('(\d+.?\d*) (\d+.?\d*) (\d+.?\d*) (\d+.?\d*) (\d+.?\d*)', line))
        print(content_extract)
    return content_extract


if __name__ == '__main__':

    # 记得路径尾部加“/”，不然调用join方法是它会用“\”替代，那样不好，容易造成转义字符问题。
    # ../表示上一层路径

    # 最终写入的文件路径信息是要给tensorflow-yolov3用的,我们要向其指定我们图片的位置：
    # source_img_path_related_to_train_py = '../Dontla_Dataset/20190822_Artificial_Flower/20191023_f_cotton_g/'
    source_img_path_related_to_train_py = '/home/fire_exit/VOCdevkit/VOC2007/JPEGImag/'

    # 以下三个路径是相对当前文件的
    source_img_path = 'F:/VOCdevkit/VOC2007/JPEGImag/'
    source_txt_path = 'F:/VOCdevkit/VOC2007/txt/'
    target_txt_path = 'F:/VOCdevkit/VOC2007/qingxi/'

    # 读取source_txt_path路径下所有文件（包括子文件夹下文件）
    filenames = os.listdir(source_txt_path)

    # 调用自定义的sort_filenames函数对filenames重新排序（如果不重新排序它貌似会以1、10、100...的顺序排而不是以1、2、3...的顺序）
    # \是转义字符
    # pattern = '\((.*?)\)'
    # Dontla 20200204 现在文件名就是纯数字，所以pattern也得改
    pattern = '(.*?).txt'
    sort_filenames(filenames, pattern)

    # print(filenames)
    # ['f_cotton-g_top (1).txt', 'f_cotton-g_top (2).txt', 'f_cotton-g_top (3).txt',...]

    # TODO(Dontla): 提取filenames中数字
    '''
    for filename in filenames:
        if filename.endswith('.txt'):
            filepath = os.path.join(source_txt_path, filename)
            # print(filepath)
    '''

    # 获取所有txt文件的路径列表
    # 这么优雅的语法是从哪学来的？如实招来！
    # filepaths = [os.path.join(source_txt_path, filename) for filename in filenames if filename.endswith('.txt')]

    # 打开俩文件准备写入
    train_file = open(target_txt_path + 'train.txt', 'w', encoding='utf-8')
    test_file = open(target_txt_path + 'test.txt', 'w', encoding='utf-8')

    # 创建写入内容字符串变量
    train_file_content = ''
    test_file_content = ''

    # 打开文件提取其中数字并将内容重构后写入新文件
    for filename in filenames:

        # 打开文件：
        with open(os.path.join(source_txt_path, filename), 'r', encoding='utf-8') as f:

            # 读取文件内容（按行读取不是全部读取）
            # content = f.readlines()

            content = f.read()

            # 提取数据
            content_extract = extract_content(content)

            # 提取数据
            # content_extract = extract_content_readlines(content)

            # print(content_extract)
            # [('0', '0.228125', '0.670833', '0.164063', '0.227778'), ('0', '0.382031', '0.845139', '0.140625', '0.218056'),...]

            # 获取当前图片分辨率信息（这样不论图片尺寸多少都能成功转换）（re.findall()返回的是列表，需要将它转换成字符串）
            # 读取图片
            img = cv2.imread('{}{}.jpg'.format(source_img_path, ''.join(re.findall('(.*?).txt', filename))))

            # print(''.join(re.findall('(.*?).txt', filename)))
            # f_cotton-g_top (1)

            # 显示图片
            # cv2.namedWindow('test', cv2.WINDOW_AUTOSIZE)
            # cv2.imshow('test', img)
            # cv2.waitKey(0)

            # 获取图片分辨率
            img_width = img.shape[1]
            img_height = img.shape[0]

            # print(img.shape)
            # (720, 1280, 3)

            # f2.write('{}{}.jpg'.format(source_img_path_related_to_train_py, ''.join(re.findall('(.*?).txt', filename))))

            # 创建单行写入字符串的路径头字符串
            path_str = source_img_path_related_to_train_py + os.path.splitext(filename)[0] + '.jpg'

            # 创建单行写入字符串的目标坐标字符串
            obj_strs = ''

            # print(os.path.splitext(filename))
            # ('f_cotton-g_top (1)', '.txt')

            # 将数据格式从相对坐标转换成绝对坐标
            for obj_str in content_extract:
                # print(obj_str)
                # ('0', '0.228125', '0.670833', '0.164063', '0.227778')
                # ('0', '0.382031', '0.845139', '0.140625', '0.218056')
                # ('0', '0.380859', '0.652778', '0.135156', '0.200000')
                # ...

                # print(type(object_str))
                # <class 'tuple'>

                # 将元组字符串转换成列表数字
                object_evar = list(map(eval, obj_str))

                # print(object_evar)
                # [0, 0.228125, 0.670833, 0.164063, 0.227778]
                # ...

                # 映射变量
                class_id = object_evar[0]

                x, y = object_evar[1] * img_width, object_evar[2] * img_height

                w, h = object_evar[3] * img_width, object_evar[4] * img_height

                # 判断数据是否超出限制（数据清洗）（包括清洗超限坐标和错误class_id）
                if class_id >8 \
                        or round(x - w / 2) < 0 \
                        or round(x + w / 2) > img_width \
                        or round(x - w / 2) >= round(x + w / 2) \
                        or round(y - h / 2) < 0 \
                        or round(y + h / 2) > img_height \
                        or round(y - h / 2) >= round(y + h / 2):
                    print('错误标注：')
                    print(filename)
                    print(object_evar)
                    print('[{}, {}, {}, {}, {}]'.format(round(x - w / 2), round(y - h / 2), round(x + w / 2),
                                                        round(y + h / 2), class_id))
                    continue

                # 将映射变量格式化后加入到obj_strs中：
                obj_strs += ' {},{},{},{},{}'.format(round(x - w / 2), round(y - h / 2), round(x + w / 2),
                                                     round(y + h / 2), class_id)

            # 拆分训练集和测试集
            # 训练集占比
            train_scale = 0.75

            # 设置随机概率
            proba = random.random()

            # 如果该张图片经过数据清洗后没有目标，则跳过，不将其加入到train.txt和test.txt文件中
            if obj_strs == '':
                print('空文件：{}'.format(filename))
                print('content：{}'.format(content))
                # print('content_extract：{}'.format(content_extract))
                # print(re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)\n', content))
                cv2.imwrite('null_img\\{}.jpg'.format(''.join(re.findall('(.*?).txt', filename))), img)
                print('将图片拷贝到“空文件”文件夹')
                continue
            else:
                write_strs = path_str + obj_strs
            print(write_strs)

            # 判断该写入哪个文件
            if proba < train_scale:
                train_file_content += write_strs + '\n'
            else:
                test_file_content += write_strs + '\n'

            # print(write_strs)
            # ./dontla_source_img/1.jpg 275,138,374,226,0 669,36,782,153,0
            # ./dontla_source_img/2.jpg 453,228,623,366,0
            # ./dontla_source_img/3.jpg 723,269,847,414,0 339,376,541,494,0
            # ...

    # 将两个即将写入的内容去除首位的无效字符（如空格，换行符，制表符，回车符）
    train_file_content = train_file_content.strip()
    test_file_content = test_file_content.strip()

    # 将内容写入俩文件
    train_file.write(train_file_content)
    test_file.write(test_file_content)

    # 关闭俩文件
    train_file.close()
    test_file.close()

    '''
    all = os.walk(source_txt_path)

    # dirpath：从all中存储的source_txt_path下文件夹及子文件夹列表中取出每个文件夹及子文件夹路径
    # dirnames ：dirpath下的文件夹列表（不包括子文件夹）
    # filenames ：dirpath下文件的文件名列表
    for dirpath, dirnames, filenames in all:

        # print('path:',dirpath)
        # print('dir:',dirnames)
        # print('filelist:',filenames)

        for filename in filenames:

            # print(filename)
            # 20190822_Artificial_Flower (1).txt

            if filename.endswith('.txt'):
                filepath = os.path.join(dirpath, filename)

                # print(filepath)
                # ../20190822_Artificial_Flower_Annotations_Yolo/20190822_Artificial_Flower (99).txt

                with open(filepath, 'r', encoding='utf-8') as f:
                    content=f.read()

                    # 不能省略\n不然就识别不出来了
                    # content_extract=re.findall('(.*) (.*) (.*) (.*) (.*)\n',content)
                    content_extract=re.findall('(.*?) (.*?) (.*?) (.*?) (.*?)\n',content)

                    # print(content_extract)
                    # [('0', '0.491797', '0.772917', '0.103906', '0.170833'), ('0', '0.355078', '0.569444', '0.116406', '0.183333')]

                # Dontla deleted 20191023
                # with open(filepath,'r',encoding='utf-8') as f:
                #     content_list=f.readlines()
                #
                #     # print(content_list)
                #     # ['0 0.491797 0.772917 0.103906 0.170833\n', '0 0.355078 0.569444 0.116406 0.183333\n']
                #
                #     for content in content_list:
                #         break
                #     # target_info=re.findall('(.*?) ')
    '''

代码基本是Dontla老哥写的，老哥是真的强。

进行训练

首先将vocname文件改成自己的类别
然后将我们前面生成的train与test放入原本对应的dateset里的空txt理面
然后对config文件进行一些修改

_C.YOLO.CLASSES                = "./data/classes/voc.names"
__C.TRAIN.ANNOT_PATH            = "./data/dataset/voc_train.txt"
__C.TEST.ANNOT_PATH             = "./data/dataset/voc_test.txt"

训练分为两种
第一种是从头进行训练，耗费事件比较长：

$ python train.py
$ tensorboard --logdir ./data

第二种从coco预模型进行训练

$ cd checkpoint
$ wget https://github.com/YunYang1994/tensorflow-yolov3/releases/download/v1.0/yolov3_coco.tar.gz
$ tar -xvf yolov3_coco.tar.gz
$ cd ..
$ python convert_weight.py --train_from_coco
$ python train.py

训练差不多后进行评估模型

$ python evaluate.py
$ cd mAP
$ python main.py -na

区域框选

为了可以实现选择部分图像区域进行检测，增加了区域框选功能，主要是使用opencv的roi

import cv2
global img_cut_path
def image_cut(image_path):
    global img_cut_path
    img = cv2.imread(image_path)
    roi = cv2.selectROI(windowName="roi", img=img, showCrosshair=True, fromCenter=False,)

    x, y, w, h = roi

    cv2.rectangle(img=img, pt1=(x, y), pt2=(x + w, y + h), color=(0, 0, 255), thickness=2)
    cut_img = img[y:y+h, x:x+w]
    cv2.imshow("roi", img)
    #img_cut_path = './docs/images/road1_cut.jpg'
    #cv2.imwrite(img_cut_path, cut_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    return cut_img

结果保存为json文件

import cv2
import numpy as np
import core.utils as utils
import tensorflow as tf
from PIL import Image
import json
from collections import defaultdict, OrderedDict
def to_json(bboxes,file):
    classes = utils.read_class_names('./data/classes/coco.names')
    video = {}
    with open(file, 'w', encoding='utf-8') as file:

        for i, bbox in enumerate(bboxes):
            class_id = int(bbox[5])
            bbox_dict = {"物体名称":classes[class_id],"xmin": bbox[0],
                         "ymin": bbox[1],"xmax":bbox[2],"ymax":bbox[3],
                         "score":bbox[4],"class_id":bbox[5]}
            video['{}'.format(i)]=bbox_dict
        bbox_json = json.dumps(video, ensure_ascii=False,indent=1)
        file.write(bbox_json+"\n")
    return