VOC数据集与COCO数据集

最新推荐文章于 2024-07-07 14:55:39 发布

噢耶

最新推荐文章于 2024-07-07 14:55:39 发布

阅读量2.8k

点赞数 3

分类专栏：深度学习文章标签：深度学习

本文链接：https://blog.csdn.net/weixin_44764524/article/details/112345718

版权

深度学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

本文介绍了VOC数据集的结构，如何使用Python脚本将VOC转换为YOLO所需的txt格式，并展示了如何将COCO数据集从JSON转为VOC XML格式。同时，详细讲解了COCO数据集的处理方法和类别转换。

摘要由CSDN通过智能技术生成

说明：以下代码全部为完整的，但是其中路径不是一个项目，可根据自己情况修改，仅供参考！个人笔记，一起学习！！
VOC2007：中包含9963张标注过的图片，由train/val/test三部分组成，共标注出24,640个物体。 VOC2007的test数据label已经公布，之后的没有公布（只有图片，没有label）。

VOC2012：对于检测任务，VOC2012的trainval/test包含08-11年的所有对应图片。 trainval有11540张图片共27450个物体。对于分割任务， VOC2012的trainval包含07-11年的所有对应图片， test只包含08-11。trainval有 2913张图片共6929个物体。
这些物体一共分为20类：

Person: person
Animal: bird, cat, cow, dog, horse, sheep
Vehicle: aeroplane, bicycle, boat, bus, car, motorbike, train
Indoor: bottle, chair, dining table, potted plant, sofa, tv/monitor

voc数据集格式：

  data
    |-- Annotations
    		|-- all xml files #存放xml文件，与JPEGImages图片一一对应
    |-- JPEGImages
    		|-- all your samples #所有你的数据集图片
    |-- ImageSets
    		|-- 。。。。#存放数据集分成的txt文件，每一行包含图片的名称。

ImageSets文件夹下txt文件的生成：voc_annotation.py

import xml.etree.ElementTree as ET
from os import getcwd

sets = [('data', 'train'), ('data', 'val'), ('data', 'test')]
classes = ["jyz", "xcxj", "fzc", "nc", "jyz_gz", "fzc_gz"] #根据自己情况修改
def convert_annotation(year, image_id, list_file):
    in_file = open('./Annotations/%s.xml' % (image_id),encoding='utf-8')#注意代码与xml文件路径
    tree = ET.parse(in_file)
    root = tree.getroot()
    for obj in root.iter('object'):
        difficult = 0
        if obj.find('difficult') != None:
            difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text),
             int(xmlbox.find('ymax').text))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
wd = getcwd()
for year, image_set in sets:
    image_ids = open('./ImageSets\%s.txt' % (image_set)).read().strip().split()#注意路径
    list_file = open('%s_%s.txt' % (year, image_set), 'w')
    for image_id in image_ids:
        list_file.write('%s\JPEGImages\%s.jpg' % (wd, image_id))
        convert_annotation(year, image_id, list_file)
        list_file.write('\n')
    list_file.close()

voc_label.py,运行后在data/label文件夹下面生成了对应的txt标签,并在data下生成三个txt文件。以下对应yolov4所需的txt文件格式：(Image_path xmin0,ymin0,xmax0,ymax0,class0 xmin1,ymin1,xmax1,ymax1,class1 …)。

# xml解析包
import xml.etree.ElementTree as ET
import pickle
import os
# os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
from os import listdir, getcwd
from os.path import join
sets = ['train', 'test', 'val']
classes = ['jyz', 'xcxj', 'fzc', 'nc','jyz_gz','fzc_gz']
# 进行归一化操作
def convert(size, box): # size:(原图w,原图h) , box:(xmin,xmax,ymin,ymax)
    dw = 1./size[0]     # 1/w
    dh = 1./size[1]     # 1/h
    x = (box[0] + box[1])/2.0   # 物体在图中的中心点x坐标
    y = (box[2] + box[3])/2.0   # 物体在图中的中心点y坐标
    w = box[1] - box[0]         # 物体实际像素宽度
    h = box[3] - box[2]         # 物体实际像素高度
    x = x*dw    # 物体中心点x的坐标比(相当于 x/原图w)
    w = w*dw    # 物体宽度的宽度比(相当于 w/原图w)
    y = y*dh    # 物体中心点y的坐标比(相当于 y/原图h)
    h = h*dh    # 物体宽度的宽度比(相当于 h/原图h)
    return (x, y, w, h)    # 返回 相对于原图的物体中心点的x坐标比,y坐标比,宽度比,高度比,取值范围[0-1]
# year ='2012', 对应图片的id（文件名）
def convert_annotation(image_id):
    '''
    将对应文件名的xml文件转化为label文件，xml文件包含了对应的bunding框以及图片长款大小等信息，
    通过对其解析，然后进行归一化最终读到label文件中去，也就是说
    一张图片文件对应一个xml文件，然后通过解析和归一化，能够将对应的信息保存到唯一一个label文件中去
    labal文件中的格式：calss x y w h　　同时，一张图片对应的类别有多个，所以对应的ｂｕｎｄｉｎｇ的信息也有多个
    '''
    # 对应的通过year 找到相应的文件夹，并且打开相应image_id的xml文件，其对应bund文件
    in_file = open('./Annotations/%s.xml' % (image_id), encoding='utf-8')
    # 准备在对应的image_id 中写入对应的label，分别为
    # <object-class> <x> <y> <width> <height>
    out_file = open('./labels/%s.txt' % (image_id), 'w', encoding='utf-8')
    # 解析xml文件
    tree = ET.parse(in_file)
    # 获得对应的键值对
    root = tree.getroot()
    # 获得图片的尺寸大小
    size = root.find('size')
    # 获得宽
    w = int(size.find('width').text)
    # 获得高
    h = int(size.find('height').text)
    # 遍历目标obj
    for obj in root.iter('object'):
        # 获得difficult ？？
        difficult = obj.find('difficult').text
        # 获得类别 =string 类型
        cls = obj.find('name').text
        # 如果类别不是对应在我们预定好的class文件中，或difficult==1则跳过
        if cls not in classes or int(difficult) == 1:
            continue
        # 通过类别名称找到id
        cls_id = classes.index(cls)
        # 找到bndbox 对象
        xmlbox = obj.find('bndbox')
        # 获取对应的bndbox的数组 = ['xmin','xmax','ymin','ymax']
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        print(image_id, cls, b)
        # 带入进行归一化操作
        # w = 宽, h = 高， b= bndbox的数组 = ['xmin','xmax','ymin','ymax']
        bb = convert((w, h), b)
        # bb 对应的是归一化后的(x,y,w,h)
        # 生成 calss x y w h 在label文件中
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# 返回当前工作目录
wd = getcwd()
print(wd)
for image_set in sets:
    '''
    对所有的文件数据集进行遍历
    做了两个工作：
　　　　１．讲所有图片文件都遍历一遍，并且将其所有的全路径都写在对应的txt文件中去，方便定位
　　　　２．同时对所有的图片文件进行解析和转化，将其对应的bundingbox 以及类别的信息全部解析写到label 文件中去
    　　　　　最后再通过直接读取文件，就能找到对应的label 信息
    '''
    # 先找labels文件夹如果不存在则创建
    if not os.path.exists('./labels/'):
        os.makedirs('./labels/')
    # 读取在ImageSets/Main 中的train、test..等文件的内容
    # 包含对应的文件名称
    image_ids = open('./ImageSets/%s.txt' % (image_set)).read().strip().split()
    # 打开对应的2012_train.txt 文件对其进行写入准备
    list_file = open('./%s.txt' % (image_set), 'w')
    # 将对应的文件_id以及全路径写进去并换行
    for image_id in image_ids:
        list_file.write('./images/%s.jpg\n' % (image_id))
        # 调用  year = 年份  image_id = 对应的文件名_id
        convert_annotation(image_id)
    # 关闭文件
    list_file.close()

COCO数据集是一个大型的、丰富的物体检测，分割和字幕数据集。这个数据集以scene understanding为目标，主要从复杂的日常场景中截取，图像中的目标通过精确的segmentation进行位置的标定。图像包括91类目标，328,000影像和2,500,000个label。目前为止有语义分割的最大数据集，提供的类别有80 类，有超过33 万张图片，其中20 万张有标注，整个数据集中个体的数目超过150 万个。
coco数据集格式：json文件

coco数据集格式转txt文件：coco.py

import sys

sys.path.append("..")
import xml.etree.ElementTree as ET
import config.yolov4_config as cfg
import os
from tqdm import tqdm


def parse_voc_annotation(
    data_path, file_type, anno_path, use_difficult_bbox=False
):

    classes = cfg.COCO_DATA["CLASSES"]
    img_inds_file = os.path.join(
        data_path, "ImageSets", file_type + ".txt"
    )
    with open(img_inds_file, "r") as f:
        lines = f.readlines()
        image_ids = [line.strip() for line in lines]

    with open(anno_path, "a") as f:
        for image_id in tqdm(image_ids):
            image_path = os.path.join(
                data_path, "JPEGImages", image_id + ".jpg"
            )
            annotation = image_path
            label_path = os.path.join(
                data_path, "Annotations", image_id + ".xml"
            )
            root = ET.parse(label_path).getroot()
            objects = root.findall("object")
            for obj in objects:
                difficult = obj.find("difficult").text.strip()
                if (not use_difficult_bbox) and (
                    int(difficult) == 1
                ):  # difficult 表示是否容易识别，0表示容易，1表示困难
                    continue
                bbox = obj.find("bndbox")
                class_id = classes.index(obj.find("name").text.lower().strip())
                xmin = bbox.find("xmin").text.strip()
                ymin = bbox.find("ymin").text.strip()
                xmax = bbox.find("xmax").text.strip()
                ymax = bbox.find("ymax").text.strip()
                annotation += " " + ",".join(
                    [xmin, ymin, xmax, ymax, str(class_id)]
                )
            annotation += "\n"
            if objects:
                f.write(annotation)

            # print(annotation)

    return len(image_ids)


if __name__ == "__main__":
    # train_set :  VOC2007_trainval 和 VOC2012_trainval
    train_data_path_2007 = os.path.join(
        cfg.DATA_PATH, "COCO2017_train", "VOCdevkit", "VOC2007"
    )
    train_data_path_2012 = os.path.join(
        cfg.DATA_PATH, "COCO2017_val", "VOCdevkit", "VOC2007"    #修改路径
    )
    train_annotation_path = os.path.join("../data", "train_annotation.txt") #修改文件名
    if os.path.exists(train_annotation_path):
        os.remove(train_annotation_path)

    len_train = parse_voc_annotation(
        train_data_path_2007,
        "trainval",
        train_annotation_path,
        use_difficult_bbox=False,
    ) + parse_voc_annotation(
        train_data_path_2012,
        "trainval",
        train_annotation_path,
        use_difficult_bbox=False,
    )

    print(
        "The number of images for train and test are :train : {0}".format(
            len_train
        )
    )

COCO数据集转VOC数据集：

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw

# the path you want to save your results for coco to voc
savepath = "cocodata/xml/"
img_dir = savepath + "images/"
anno_dir = savepath + "Annotations/"
datasets_list = ["train2017", "val2017", "test2017"]

classes_names = [
   #自己添加类名
]
# Store annotations and train2014/val2014/... in this folder
dataDir = "E:\code\object_detection\yolov3-other-master\YOLOV3-master\data/" #修改自己数据集路径

headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>My Database</database>
        <annotation>COCO</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <owner>
        <flickrid>NULL</flickrid>
        <name>company</name>
    </owner>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""

tailstr = """\
</annotation>
"""


# if the dir is not exists,make it,else delete it
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)

mkr(img_dir)
mkr(anno_dir)

def id2name(coco):
    classes = dict()
    for cls in coco.dataset["categories"]:
        classes[cls["id"]] = cls["name"]
    return classes

def write_xml(anno_path, head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
    f.write(tail)


def save_annotations_and_imgs(coco, dataset, filename, objs):
    # eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
    anno_path = anno_dir + filename[:-3] + "xml"
    img_path = dataDir + dataset + "/" + filename
    # print(img_path)
    dst_imgpath = img_dir + filename

    img = cv2.imread(img_path)
    if img.shape[2] == 1:
        # print(filename + " not a RGB image")
        return
    # shutil.copy(img_path, dst_imgpath)

    head = headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path, head, objs, tail)


def showimg(coco, dataset, img, classes, cls_id, show=True):
    global dataDir
    I = Image.open("%s/%s/%s" % (dataDir, dataset, img["file_name"]))
    # 通过id，得到注释的信息
    annIds = coco.getAnnIds(imgIds=img["id"], catIds=cls_id, iscrowd=None)
    # print(annIds)
    anns = coco.loadAnns(annIds)
    # print(anns)
    # coco.showAnns(anns)
    objs = []
    for ann in anns:
        class_name = classes[ann["category_id"]]
        if class_name in classes_names:
            # print(class_name)
            if "bbox" in ann:
                bbox = ann["bbox"]
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                if xmin <= 0:
                    xmin += 1
                if ymin <= 0:
                    ymin += 1
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                draw = ImageDraw.Draw(I)
                draw.rectangle([xmin, ymin, xmax, ymax])
    if show:
        plt.figure()
        plt.axis("off")
        plt.imshow(I)
        plt.show()

    return objs


if __name__ == "__main__":
    i = 0
    for dataset in datasets_list:
        a = datasets_list

        # ./COCO/annotations/instances_train2014.json 修改自己路径
        annFile = "{}/annotations/instances_{}.json".format(dataDir, dataset)

        # COCO API for initializing annotated data
        coco = COCO(annFile)
        """
        COCO finished:
        loading annotations into memory...
        Done (t=0.81s)
        creating index...
        index created!
        end
        """
        # show all classes in coco
        classes = id2name(coco)
        # print(classes)
        # [1, 2, 3, 4, 6, 8]
        classes_ids = coco.getCatIds(catNms=classes_names)
        # print(classes_ids)
        for cls in classes_names:
            i += 1
            print(i)
            b = classes_names
            # Get ID number of this class
            cls_id = coco.getCatIds(catNms=[cls])
            img_ids = coco.getImgIds(catIds=cls_id)
            # print(cls, len(img_ids))
            # imgIds=img_ids[0:10]
            for imgId in tqdm(img_ids):
                c = img_ids
                img = coco.loadImgs(imgId)[0]
                filename = img["file_name"]
                # print(filename)
                objs = showimg(
                    coco, dataset, img, classes, classes_ids, show=False
                )
                # print(objs)
                save_annotations_and_imgs(coco, dataset, filename, objs)

VOC转COCO数据集：xml_to_json.py

# coding:utf-8
# 运行前请先做以下工作：
# pip install lxml
# 将所有的图片及xml文件存放到xml_dir指定的文件夹下，并将此文件夹放置到当前目录下

import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET

START_BOUNDING_BOX_ID = 1
save_path = "."

def get(root, name):
    return root.findall(name)


def get_and_check(root, name, length):
    vars = get(root, name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
    if length and len(vars) != length:
        raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
    if length == 1:
        vars = vars[0]
    return vars


def convert(xml_list, json_file):
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    categories = pre_define_categories.copy()
    bnd_id = START_BOUNDING_BOX_ID
    all_categories = {}
    for index, line in enumerate(xml_list):
        xml_f = line
        tree = ET.parse(xml_f)
        root = tree.getroot()

        filename = os.path.basename(xml_f)[:-4] + ".jpg"
        image_id = 20190000001 + index
        size = get_and_check(root, 'size', 1)
        width = int(get_and_check(size, 'width', 1).text)
        height = int(get_and_check(size, 'height', 1).text)
        image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
        json_dict['images'].append(image)
        #  Currently we do not support segmentation
        segmented = get_and_check(root, 'segmented', 1).text
        assert segmented == '0'
        for obj in get(root, 'object'):
            category = get_and_check(obj, 'name', 1).text
            if category in all_categories:
                all_categories[category] += 1
            else:
                all_categories[category] = 1
            if category not in categories:
                if only_care_pre_define_categories:
                    continue
                new_id = len(categories) + 1
                print(
                    "[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
                        category, pre_define_categories, new_id))
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, 'bndbox', 1)
            xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
            ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
            xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
            ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
            assert (xmax > xmin), "xmax <= xmin, {}".format(line)
            assert (ymax > ymin), "ymax <= ymin, {}".format(line)
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
                image_id, 'bbox': [xmin, ymin, o_width, o_height],
                   'category_id': category_id, 'id': bnd_id, 'ignore': 0,
                   'segmentation': []}
            json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1

    for cate, cid in categories.items():
        cat = {'supercategory': 'food', 'id': cid, 'name': cate}
        json_dict['categories'].append(cat)
    json_fp = open(json_file, 'w')
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()
    print("------------create {} done--------------".format(json_file))
    print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
                                                                                  all_categories.keys(),
                                                                                  len(pre_define_categories),
                                                                                  pre_define_categories.keys()))
    print("category: id --> {}".format(categories))
    print(categories.keys())
    print(categories.values())


if __name__ == '__main__':
    # 定义你自己的类别
    classes = ['RBC']
    pre_define_categories = {}
    for i, cls in enumerate(classes):
        pre_define_categories[cls] = i + 1
    # 这里也可以自定义类别id，把上面的注释掉换成下面这行即可
    # pre_define_categories = {'a1': 1, 'a3': 2, 'a6': 3, 'a9': 4, "a10": 5}
    only_care_pre_define_categories = True  # or False

    # 保存的json文件
    save_json_train = 'train.json'
    save_json_val = 'val.json'
    save_json_test = 'test.json'

    # 初始文件所在的路径
    xml_dir = "./Annotations1"
    xml_list = glob.glob(xml_dir + "/*.xml")
    xml_list = np.sort(xml_list)

    # 打乱数据集
    np.random.seed(100)
    np.random.shuffle(xml_list)

    # 按比例划分打乱后的数据集
    train_ratio = 0.8
    val_ratio = 0.1
    train_num = int(len(xml_list) * train_ratio)
    val_num = int(len(xml_list) * val_ratio)
    xml_list_train = xml_list[:train_num]
    xml_list_val = xml_list[train_num: train_num+val_num]
    xml_list_test = xml_list[train_num+val_num:]

    # 将xml文件转为coco文件，在指定目录下生成三个json文件（train/test/food）
    convert(xml_list_train, save_json_train)
    convert(xml_list_val, save_json_val)
    convert(xml_list_test, save_json_test)

噢耶

关注

3
点赞
踩
23

收藏

觉得还不错? 一键收藏
打赏
2
评论
VOC数据集与COCO数据集

VOC2007：中包含9963张标注过的图片，由train/val/test三部分组成，共标注出24,640个物体。 VOC2007的test数据label已经公布，之后的没有公布（只有图片，没有label）。VOC2012：对于检测任务，VOC2012的trainval/test包含08-11年的所有对应图片。 trainval有11540张图片共27450个物体。对于分割任务， VOC2012的trainval包含07-11年的所有对应图片， test只包含08-11。trainval有 29
复制链接

扫一扫