yolov3_v1（数据处理）

最新推荐文章于 2022-12-21 13:59:54 发布

尼古拉斯·two_dog

最新推荐文章于 2022-12-21 13:59:54 发布

阅读量634

点赞数

分类专栏：深度学习文章标签： yolov3 数据预处理

本文链接：https://blog.csdn.net/gm_ergou/article/details/118570318

版权

深度学习专栏收录该内容

28 篇文章 4 订阅

订阅专栏

参考链接：

https://segmentfault.com/a/1190000021794637?utm_source=tag-newest

https://blog.csdn.net/qq_38232598/article/details/88695454

代码：https://github.com/YunYang1994/tensorflow-yolov3

1.数据标注

2.xml格式数据处理

2.1 按比例划分数据集

2.2 根据划分好的数据集，生成yolo训练所需的格式

2.3 查看yolo格式的数据是否正确，show画框

数据处理

1.数据标注

安装labelIme：pip install labelImg 或者 pip install labelme

在控制台输入labelImg 或者 labelme，调出标注工具

使用labelImg工具标注，格式选择VOC(生成xml格式)

使用labelme工具标注(生成json格式)

2.xml格式数据处理

2.1 按比例划分数据集

#----------------------------------------------------------------------#
#   验证集的划分在train.py代码里面进行
#   test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import os
import random 
random.seed(0)

xmlfilepath=r'../data/img/xml'
saveBasePath=r"../data/img/xml/"
 
#----------------------------------------------------------------------#
#   想要增加测试集修改trainval_percent
#   train_percent不需要修改
#----------------------------------------------------------------------#
train_percent=0.8

temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
    if xml.endswith(".xml"):
        total_xml.append(xml)

num=len(total_xml)  
list=range(num)  
tv=int(num*train_percent)   
trainval= random.sample(list,tv)  
 
print("train size",tv)
print("test size",num-tv)

ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')  
ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')  
 
for i  in list:  
    name=total_xml[i][:-4]+'\n'  
    if i in trainval:  
        ftrain.write(name)
    else:  
        ftest.write(name)  
   
ftrain.close()  
ftest .close()

2.2 生成yolo训练所需的格式

import os
import argparse
import xml.etree.ElementTree as ET

def convert_voc_annotation(data_path, data_type, anno_path, use_difficult_bbox=True):

    classes = ['p', 'r']
    img_inds_file = os.path.join(data_path, data_type + '.txt')
    with open(img_inds_file, 'r') as f:
        txt = f.readlines()
        image_inds = [line.strip() for line in txt]
    
    with open(anno_path, 'a') as f:
        for image_ind in image_inds:
            image_path = os.path.join(data_path,  image_ind + '.jpg')
            annotation = image_path
            label_path = os.path.join(data_path,  image_ind + '.xml')
            root = ET.parse(label_path).getroot()
            objects = root.findall('object')
            for obj in objects:
                difficult = obj.find('difficult').text.strip()
                if (not use_difficult_bbox) and(int(difficult) == 1):
                    continue
                bbox = obj.find('bndbox')
                class_ind = classes.index(obj.find('name').text.lower().strip())
                xmin = bbox.find('xmin').text.strip()
                xmax = bbox.find('xmax').text.strip()
                ymin = bbox.find('ymin').text.strip()
                ymax = bbox.find('ymax').text.strip()
                annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
            print(annotation)
            f.write(annotation + "\n")
    return len(image_inds)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_path", default="../data/img/xml/")
    parser.add_argument("--train_annotation", default="../data/img/train.txt")
    parser.add_argument("--test_annotation",  default="../data/img/test.txt")
    flags = parser.parse_args()

    if os.path.exists(flags.train_annotation):os.remove(flags.train_annotation)
    if os.path.exists(flags.test_annotation):os.remove(flags.test_annotation)

    num1 = convert_voc_annotation(flags.data_path, 'train', flags.train_annotation, False)
    num2 = convert_voc_annotation(flags.data_path, 'test', flags.test_annotation, False)

2.3 查看yolo格式的数据是否正确，画框

#! /usr/bin/env python
# coding=utf-8
#================================================================
#   Copyright (C) 2019 * Ltd. All rights reserved.
#
#   Editor      : VIM
#   File name   : show_bboxes.py
#   Author      : YunYang1994
#   Created date: 2019-05-29 01:18:24
#   Description :
#
#================================================================

import cv2
import numpy as np
from PIL import Image

ID = 1
label_txt = "../data/img/test.txt"
image_info = open(label_txt).readlines()[ID].split()

image_path = image_info[0]
image = cv2.imread(image_path)
for bbox in image_info[1:]:
    bbox = bbox.split(",")
    image = cv2.rectangle(image,(int(float(bbox[0])),
                                 int(float(bbox[1]))),
                                (int(float(bbox[2])),
                                 int(float(bbox[3]))), (255,0,0), 2)

image = Image.fromarray(np.uint8(image))
image.show()

2.4 k-mean计算anchors

方法1 ：效果一般

import glob
import random
import xml.etree.ElementTree as ET

import numpy as np


def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
    y = np.minimum(cluster[:,1],box[1])

    intersection = x * y
    area1 = box[0] * box[1]

    area2 = cluster[:,0] * cluster[:,1]
    iou = intersection / (area1 + area2 -intersection)

    return iou

def avg_iou(box,cluster):
    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])


def kmeans(box,k):
    # 取出一共有多少框
    row = box.shape[0]
    
    # 每个框各个点的位置
    distance = np.empty((row,k))
    
    # 最后的聚类位置
    last_clu = np.zeros((row,))

    np.random.seed()

    # 随机选5个当聚类中心
    cluster = box[np.random.choice(row,k,replace = False)]
    # cluster = random.sample(row, k)
    while True:
        # 计算每一行距离五个点的iou情况。
        for i in range(row):
            distance[i] = 1 - cas_iou(box[i],cluster)
        
        # 取出最小点
        near = np.argmin(distance,axis=1)

        if (last_clu == near).all():
            break
        
        # 求每一个类的中位点
        for j in range(k):
            cluster[j] = np.median(
                box[near == j],axis=0)

        last_clu = near

    return cluster

def load_data(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        if height<=0 or width<=0:
            continue
        
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            data.append([xmax-xmin,ymax-ymin])
    return np.array(data)


if __name__ == '__main__':
    # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
    # 会生成yolo_anchors.txt
    SIZE = 416
    anchors_num = 9
    # 载入数据集，可以使用VOC的xml
    path = r'../data/img/xml'
    
    # 载入所有的xml
    # 存储格式为转化为比例后的width,height
    data = load_data(path)
    
    # 使用k聚类算法
    out = kmeans(data,anchors_num)
    out = out[np.argsort(out[:,0])]
    print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
    print(out*SIZE)
    data = out*SIZE
    f = open("../data/img/yolo_anchors.txt", 'w')
    row = np.shape(data)[0]
    for i in range(row):
        if i == 0:
            x_y = "%d,%d" % (data[i][0], data[i][1])
        else:
            x_y = ", %d,%d" % (data[i][0], data[i][1])
        f.write(x_y)
    f.close()

方法2：效果较好

# coding=utf-8
from __future__ import division, print_function

import numpy as np

def iou(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    param:
        box: tuple or array, shifted to the origin (i. e. width and height)
        clusters: numpy array of shape (k, 2) where k is the number of clusters
    return:
        numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    # if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
    #     raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = np.true_divide(intersection, box_area + cluster_area - intersection + 1e-10)
    # iou_ = intersection / (box_area + cluster_area - intersection + 1e-10)

    return iou_


def avg_iou(boxes, clusters):
    """
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    param:
        boxes: numpy array of shape (r, 2), where r is the number of rows
        clusters: numpy array of shape (k, 2) where k is the number of clusters
    return:
        average IoU as a single float
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])


def translate_boxes(boxes):
    """
    Translates all the boxes to the origin.
    param:
        boxes: numpy array of shape (r, 4)
    return:
    numpy array of shape (r, 2)
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)


def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    param:
        boxes: numpy array of shape (r, 2), where r is the number of rows
        k: number of clusters
        dist: distance function
    return:
        numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]

    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters


def parse_anno(annotation_path, target_size=None):
    anno = open(annotation_path, 'r')
    result = []
    for line in anno:
        s = line.strip().split(' ')
        s = s[1:]
        box_cnt = len(s)
        for i in range(box_cnt):
            point=s[i].split(',')
            x_min, y_min, x_max, y_max = float(point[0]), float(point[1]), float(point[2]), float(point[3])
            width = x_max - x_min
            height = y_max - y_min

            # if width <=0:
            #     print(line,x_max, x_min)
            # assert width > 0
            # assert height > 0
            
            # use letterbox resize, i.e. keep the original aspect ratio
            # get k-means anchors on the resized target image size
            img_w=1920
            img_h=1080
            if target_size is not None:
                resize_ratio = min(target_size[0] / img_w, target_size[1] / img_h)
                width *= resize_ratio
                height *= resize_ratio
                result.append([width, height])
            # get k-means anchors on the original image size
            else:
                result.append([width, height])

    result = np.asarray(result)
    return result


def get_kmeans(anno, cluster_num=9):

    anchors = kmeans(anno, cluster_num)
    ave_iou = avg_iou(anno, anchors)

    anchors = anchors.astype('int').tolist()

    anchors = sorted(anchors, key=lambda x: x[0] * x[1])

    return anchors, ave_iou


if __name__ == '__main__':
    # target resize format: [width, height]
    # if target_resize is speficied, the anchors are on the resized image scale
    # if target_resize is set to None, the anchors are on the original image scale
    target_size = [416, 416]
    annotation_path = "data2/train.txt"
    anno_result = parse_anno(annotation_path, target_size=target_size)
    anchors, ave_iou = get_kmeans(anno_result, 9)

    anchor_string = ''
    for anchor in anchors:
        anchor_string += '{},{}, '.format(anchor[0], anchor[1])
    anchor_string = anchor_string[:-2]

    print('anchors are:')
    print(anchor_string)
    print('the average iou is:')
    print(ave_iou)

2.5 编写标签文件

新建个文件，根据生成yolo文件的代码中的标签，按顺序写进去

3.json格式数据处理

3.1 json格式转xml格式

# -*- coding: utf-8 -*-
"""
Created on Sun May 31 10:19:23 2020
@author: ywx
"""
import os
from typing import List, Any
import numpy as np
import codecs
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
 
# 1.标签路径
labelme_path = "../data/img/json/"
#原始labelme标注数据路径
saved_path = "../data/img/json2/"
# 保存路径
isUseTest=True#是否创建test集

# 2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):
    os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):
    os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/"):
    os.makedirs(saved_path + "ImageSets/")

# 3.获取待处理文件
files = glob(labelme_path + "*.json")
files = [i.replace("\\","/").split("/")[-1].split(".json")[0] for i in files]
print(files)

# 4.读取标注信息并写入 xml
for json_file_ in files:
    json_filename = labelme_path + json_file_ + ".json"
    json_file = json.load(open(json_filename, "r", encoding="utf-8"))
    height, width, channels = cv2.imread(labelme_path + json_file_ + ".jpg").shape
    with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
 
        xml.write('<annotation>\n')
        xml.write('\t<folder>' + 'WH_data' + '</folder>\n')
        xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')
        xml.write('\t<source>\n')
        xml.write('\t\t<database>WH Data</database>\n')
        xml.write('\t\t<annotation>WH</annotation>\n')
        xml.write('\t\t<image>flickr</image>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t</source>\n')
        xml.write('\t<owner>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t\t<name>WH</name>\n')
        xml.write('\t</owner>\n')
        xml.write('\t<size>\n')
        xml.write('\t\t<width>' + str(width) + '</width>\n')
        xml.write('\t\t<height>' + str(height) + '</height>\n')
        xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
        xml.write('\t</size>\n')
        xml.write('\t\t<segmented>0</segmented>\n')
        for multi in json_file["shapes"]:
            points = np.array(multi["points"])
            labelName=multi["label"]
            xmin = min(points[:, 0])
            xmax = max(points[:, 0])
            ymin = min(points[:, 1])
            ymax = max(points[:, 1])
            label = multi["label"]
            if xmax <= xmin:
                pass
            elif ymax <= ymin:
                pass
            else:
                xml.write('\t<object>\n')
                xml.write('\t\t<name>' + labelName+ '</name>\n')
                xml.write('\t\t<pose>Unspecified</pose>\n')
                xml.write('\t\t<truncated>1</truncated>\n')
                xml.write('\t\t<difficult>0</difficult>\n')
                xml.write('\t\t<bndbox>\n')
                xml.write('\t\t\t<xmin>' + str(int(xmin)) + '</xmin>\n')
                xml.write('\t\t\t<ymin>' + str(int(ymin)) + '</ymin>\n')
                xml.write('\t\t\t<xmax>' + str(int(xmax)) + '</xmax>\n')
                xml.write('\t\t\t<ymax>' + str(int(ymax)) + '</ymax>\n')
                xml.write('\t\t</bndbox>\n')
                xml.write('\t</object>\n')
                print(json_filename, xmin, ymin, xmax, ymax, label)
        xml.write('</annotation>')

# 5.复制图片到 VOC2007/JPEGImages/下
image_files = glob(labelme_path + "*.jpg")
print("copy image files to VOC007/JPEGImages/")
for image in image_files:
    shutil.copy(image, saved_path + "JPEGImages/")

# 6.split files for txt
txtsavepath = saved_path + "ImageSets/"
ftrainval = open(txtsavepath + '/trainval.txt', 'w')
ftest = open(txtsavepath + '/test.txt', 'w')
ftrain = open(txtsavepath + '/train.txt', 'w')
fval = open(txtsavepath + '/val.txt', 'w')
total_files = glob(saved_path + "Annotations/*.xml")
total_files = [i.replace("\\","/").split("/")[-1].split(".xml")[0] for i in total_files]
trainval_files=[]
test_files=[] 
if isUseTest:
    trainval_files, test_files = train_test_split(total_files, test_size=0.2, random_state=55) 
else: 
    trainval_files=total_files
for file in trainval_files: 
    ftrainval.write(file + "\n")

# split 
train_files, val_files = train_test_split(trainval_files, test_size=0.2, random_state=55) 
# train
for file in train_files: 
    ftrain.write(file + "\n") 
# val 
for file in val_files: 
    fval.write(file + "\n")
for file in test_files:
    print(file)
    ftest.write(file + "\n")
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

3.2 xml格式转yolo格式

json转成xml格式之后，即可按照xml转yolo的步骤进行，具体方法如上一节所示。

xml格式转yolo格式：

import os
import argparse
import xml.etree.ElementTree as ET

def convert_voc_annotation(data_path, data_type, anno_path, use_difficult_bbox=True):

    classes = ['p', 'r']
    img_inds_file = os.path.join(data_path,'ImageSets', data_type + '.txt')
    with open(img_inds_file, 'r') as f:
        txt = f.readlines()
        image_inds = [line.strip() for line in txt]
    
    with open(anno_path, 'a') as f:
        for image_ind in image_inds:
            image_path = os.path.join(data_path, 'JPEGImages', image_ind + '.jpg')
            annotation = image_path
            label_path = os.path.join(data_path, 'Annotations', image_ind + '.xml')
            root = ET.parse(label_path).getroot()
            objects = root.findall('object')
            for obj in objects:
                difficult = obj.find('difficult').text.strip()
                if (not use_difficult_bbox) and(int(difficult) == 1):
                    continue
                bbox = obj.find('bndbox')
                class_ind = classes.index(obj.find('name').text.lower().strip())
                xmin = bbox.find('xmin').text.strip()
                xmax = bbox.find('xmax').text.strip()
                ymin = bbox.find('ymin').text.strip()
                ymax = bbox.find('ymax').text.strip()
                annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
            print(annotation)
            f.write(annotation + "\n")
    return len(image_inds)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_path", default="../data/img/json2/")
    parser.add_argument("--train_annotation", default="../data/img/json2/train.txt")
    parser.add_argument("--test_annotation",  default="../data/img/json2/test.txt")
    flags = parser.parse_args()

    if os.path.exists(flags.train_annotation):os.remove(flags.train_annotation)
    if os.path.exists(flags.test_annotation):os.remove(flags.test_annotation)

    num1 = convert_voc_annotation(flags.data_path, 'train', flags.train_annotation, False)
    num2 = convert_voc_annotation(flags.data_path, 'test', flags.test_annotation, False)

4.数据提供

总共十张图片，可以同时做训练+测试，进行过拟合训练测试模型。

train.txt test.txt

data/img/xml/10514.jpg 864,212,927,353,1 977,214,1047,346,1 718,200,744,240,1 756,197,788,249,1 785,194,819,227,1 556,222,592,278,1 503,226,556,283,1 364,269,419,328,1 312,277,376,339,1 219,286,273,355,1
data/img/xml/10725.jpg 935,276,1013,424,1 829,291,899,424,1 1078,273,1117,346,1 1129,267,1178,337,1
data/img/xml/11830.jpg 1,276,118,624,1 190,127,379,381,1 594,18,668,154,1 658,8,719,124,1
data/img/xml/14023.jpg 835,87,879,194,1 927,82,973,194,1
data/img/xml/1775.jpg 840,58,895,180,0 160,183,199,218,1 186,178,226,212,1 215,168,265,208,1 429,127,479,167,1 470,118,528,159,1 573,99,633,146,1 1145,60,1179,97,1 1220,64,1263,103,1 1399,80,1433,130,1 1436,81,1470,135,1 1467,82,1508,131,1 1554,118,1605,163,1 1520,112,1567,158,1 1647,123,1686,174,1 1681,128,1732,183,1
data/img/xml/2001.jpg 1,487,279,1078,1 1267,106,1296,141,1 1288,113,1323,151,1 1332,117,1369,156,1 1373,121,1412,163,1 1618,158,1656,201,1 1586,146,1620,190,1 1690,162,1729,219,1 383,115,422,139,1
data/img/xml/357.jpg 759,5,828,171,0 826,1,928,185,0 1529,65,1626,210,1 1633,82,1745,228,1 1429,27,1517,155,1 1245,1,1288,51,1 1283,1,1340,73,1
data/img/xml/7985.jpg 867,276,897,339,1 913,269,946,331,1
data/img/xml/5683.jpg 286,277,358,344,1 345,262,400,330,1 382,254,431,319,1 412,250,469,312,1 654,264,723,331,1 790,259,862,332,1 828,249,904,319,1 926,226,978,277,1
data/img/xml/7015.jpg 935,82,965,119,1 977,83,1013,115,1 1018,82,1054,119,1 1422,118,1451,154,1 1477,127,1504,156,1

anchors.txt

7,6, 6,7, 7,8, 8,7, 8,10, 7,12, 11,9, 11,14, 17,31

new.names

p
r

测试照片：