Ubuntu18.04+darknet+yolov3将coco数据集转化成VOC数据集格式并训练，及训练过错中无法加载图片的问题个人解决办法（个人记录）

本文链接：https://blog.csdn.net/lishqia/article/details/109215864

由于研究生课题需要，需要将coco数据集转化成VOC数据集格式，并加入自己做的数据集，用于训练，现将过程记录如下。

1、提取特定图片类别，同时将json转xml

cooc数据集有80类物体，有些类别本人并不需要，因此需要将需要的物体图片提取出来。同时coco数据集的标注文件为json格式的，voc数据集的标注文件是xml格式的，因此需要进行格式转换。代码如下：（此处代码来源于博客，感谢大佬分享：https://blog.csdn.net/weixin_42224823/article/details/106282114）
需要修改的地方有4处：
savepath
dataset_List
classes_names
dataDir

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw

#the path you want to save your results for coco to voc
savepath="/home/lsq/coco/coco_class/"
img_dir=savepath+'images/train2017/'
anno_dir=savepath+'Annotations/train2017/'
# datasets_list=['train2017', 'val2017']
# datasets_list=['train2017']
datasets_list=['train2017']
classes_names = ["person","backpack","handbag","suitcase","bottle","wine glass","cup","fork","knife""spoon","bowl","banana","apple","sandwich","orange","chair","sofa","bed","diningtable","mouse","remote","keyboard","cell phone","book","toothbrush"] 


#Store annotations and train2014/val2014/... in this folder
dataDir= '/home/lsq/coco/'  

headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>My Database</database>
        <annotation>COCO</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""

tailstr = '''\
</annotation>
'''

#if the dir is not exists,make it,else delete it
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
    classes=dict()
    for cls in coco.dataset['categories']:
        classes[cls['id']]=cls['name']
    return classes

def write_xml(anno_path,head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
    f.write(tail)


def save_annotations_and_imgs(coco,dataset,filename,objs):
    #eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
    anno_path=anno_dir+filename[:-3]+'xml'
    img_path=dataDir+'images/'+dataset+'/'+filename
    # print(img_path)
    dst_imgpath=img_dir+filename
    print(img_path,'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')

    img=cv2.imread(img_path)
    # print(img)
    if (img.shape[2] == 1):
        print(filename + " not a RGB image")
        return

    shutil.copy(img_path, dst_imgpath)

    head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path,head, objs, tail)


def showimg(coco,dataset,img,classes,cls_id,show=True):
    global dataDir
    I=Image.open('%s/%s/%s/%s'%(dataDir,'images',dataset,img['file_name']))
    #Get the annotated information by ID
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
    # print(annIds)
    anns = coco.loadAnns(annIds)
    # print(anns)
    # coco.showAnns(anns)
    objs = []
    for ann in anns:
        class_name=classes[ann['category_id']]
        if class_name in classes_names:
            print(class_name)
            if 'bbox' in ann:
                bbox=ann['bbox']
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                draw = ImageDraw.Draw(I)
                draw.rectangle([xmin, ymin, xmax, ymax])
    if show:
        plt.figure()
        plt.axis('off')
        plt.imshow(I)
        plt.show()

    return objs

for dataset in datasets_list:
    #./COCO/annotations/instances_train2014.json
    annFile='{}/annotations_1/instances_{}.json'.format(dataDir,dataset)

    #COCO API for initializing annotated data
    coco = COCO(annFile)
    '''
    When the COCO object is created, the following information will be output:
    loading annotations into memory...
    Done (t=0.81s)
    creating index...
    index created!
    So far, the JSON script has been parsed and the images are associated with the corresponding annotated data.
    '''
    #show all classes in coco
    classes = id2name(coco)
    print(classes)
    #[1, 2, 3, 4, 6, 8]
    classes_ids = coco.getCatIds(catNms=classes_names)
    print(classes_ids)
    # exit()
    for cls in classes_names:
        #Get ID number of this class
        cls_id=coco.getCatIds(catNms=[cls])
        img_ids=coco.getImgIds(catIds=cls_id)
        print(cls,len(img_ids))
        # imgIds=img_ids[0:10]
        for imgId in tqdm(img_ids):
            img = coco.loadImgs(imgId)[0]
            filename = img['file_name']
            # print(filename)
            objs=showimg(coco, dataset, img, classes,classes_ids,show=False)
            print(objs)
            save_annotations_and_imgs(coco, dataset, filename, objs)

如果有同学需要将整个coco数据集所有类别图片的xml文件全部提取出，代码如下：
（代码来源于博客，感谢大佬分享：https://blog.csdn.net/qq_38806886/article/details/103875334）

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
savepath="/home/lsq/darknet/coco/VOC/"
datasets_list=['val2017']    
img_dir=savepath+'images/'         
anno_dir=savepath+'annotations/'      
classes_names =['person','bicycle', 'car','motorcycle','airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep','cow','elephant','bear', 'zebra', 'giraffe','backpack','umbrella', 'handbag','tie', 'suitcase', 'frisbee', 'skis', 'snowboard','sports ball', 'kite', 'baseball bat', 'baseball glove','skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork','knife', 'spoon', 'bowl', 'banana','apple', 'sandwich', 'orange','broccoli', 'carrot', 'hot dog', 'pizza','donut', 'cake', 'chair', 'couch', 'potted plant', 'bed','dining table', 'toilet','tv','laptop', 'mouse','remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
dataDir= '/home/lsq/darknet/coco' 
headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>My Database</database>
        <annotation>COCO</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <owner>
        <flickrid>NULL</flickrid>
        <name>company</name>
    </owner>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""
 
tailstr = '''\
</annotation>
'''
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
    classes=dict()
    for cls in coco.dataset['categories']:
        classes[cls['id']]=cls['name']
    return classes
 
def write_xml(anno_path,head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
    f.write(tail)
 
 
def save_annotations_and_imgs(coco,dataset,filename,objs):
    anno_path=anno_dir+filename[:-3]+'xml'
    print('anno_path:%s'%anno_path)
    #img_path=dataDir+'/'+'images'+'/'+dataset+'/'+filename
    img_path=dataDir+'/'+dataset+'/'+filename
    print('img_path:%s'%img_path)
    print('step3-image-path-OK')
    dst_imgpath=img_dir+filename
 
    img=cv2.imread(img_path)
    '''if (img.shape[2] == 1):
        print(filename + " not a RGB image")     
        return''' 
    print('img_path:%s'%img_path)
    print('dst_imgpath:%s'%dst_imgpath)
    shutil.copy(img_path, dst_imgpath)
 
    head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path,head, objs, tail)
 
 
def showimg(coco,dataset,img,classes,cls_id,show=True):
    global dataDir
    #I=Image.open('%s/%s/%s/%s'%(dataDir,'images',dataset,img['file_name']))
    I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name']))  
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
    anns = coco.loadAnns(annIds)
    objs = []
    for ann in anns:
        class_name=classes[ann['category_id']]
        if class_name in classes_names:
            print(class_name)
            if 'bbox' in ann:
                bbox=ann['bbox']
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                #draw = ImageDraw.Draw(I)
                #draw.rectangle([xmin, ymin, xmax, ymax])
    # if show:
        # plt.figure()
        # plt.axis('off')
        # plt.imshow(I)
        # plt.show()
    return objs
 
for dataset in datasets_list:
    annFile='{}/annotations_1/instances_{}.json'.format(dataDir,dataset) 
    print('annFile:%s'%annFile)  
    coco = COCO(annFile)         
    '''
    loading annotations into memory...
    Done (t=0.81s)
    creating index...
    index created!
    '''
    classes = id2name(coco)                     
    print("classes:%s"%classes)
    classes_ids = coco.getCatIds(catNms=classes_names)
    print(classes_ids)
    for cls in classes_names:
        cls_id=coco.getCatIds(catNms=[cls])
        img_ids=coco.getImgIds(catIds=cls_id)
        print(cls,len(img_ids))
        # imgIds=img_ids[0:10]
        for imgId in tqdm(img_ids):
            img = coco.loadImgs(imgId)[0]
            filename = img['file_name']
            #print(filename)
            objs=showimg(coco, dataset, img, classes,classes_ids,show=False)  
            #print(objs)
            save_annotations_and_imgs(coco, dataset, filename, objs)

以上两处代码运行需要激活tensorflow环境，本人参考如下两篇博客：
https://blog.csdn.net/weixin_40109345/article/details/102738799
https://blog.csdn.net/HUGOPIGS/article/details/96134459

2 VOC数据集制作

将提取出的图片及xml标注文件加入自己制作的数据集后，用于制作VOC格式数据集。
具体过程可参考本人另一篇博客：https://blog.csdn.net/lishqia/article/details/108290827
json提取出来是xml格式的，yolov3中进行VOC数据集训练的时候还需要txt格式的标注文件，因此需要将每个xml格式标注文件都转化成txt文件并放入labels文件夹中存储，代码如下：

import os
import sys
import xml.etree.ElementTree as ET
import glob

def xml_to_txt(indir,outdir):

    os.chdir(indir)
    annotations = os.listdir('.')
    annotations = glob.glob(str(annotations)+'*.xml')

    for i, file in enumerate(annotations):

        file_save = file.split('.')[0]+'.txt'
        file_txt=os.path.join(outdir,file_save)
        f_w = open(file_txt,'w')

        # actual parsing
        in_file = open(file)
        tree=ET.parse(in_file)
        root = tree.getroot()

        for obj in root.iter('object'):
                current = list()
                name = obj.find('name').text
				
                #name1=name.encode('utf-8')
                f_w.write(str(name)+' ')  #name.encode("utf-8")
				
                xmlbox = obj.find('bndbox')
                xn = xmlbox.find('xmin').text
                xx = xmlbox.find('xmax').text
                yn = xmlbox.find('ymin').text
                yx = xmlbox.find('ymax').text
                #print xn
                f_w.write(xn+' '+yn+' '+xx+' '+yx+' '+'\n')


indir='/home/lsq/darknet/VOCdevkit/VOCdevkit/VOC2007/Annotations' 
outdir='/home/lsq/darknet/VOCdevkit/VOCdevkit/VOC2007/labels' 

xml_to_txt(indir,outdir)

3 单独说明

进行图片提取的时候，我提取了大约20类，提取之后发现将所有图片都提取出来了，提取的意义是什么呢？其实最开始的时候我是懒得提取的，想法是把coco数据集的80类物体加上自己制作的数据集中的大约20类物体放在一起一共100类全部用于训练，但是训练过程一直报错，cannot load image,无法加载图片，研究了两天排查各种原因，最后想到会不会是训练的类别太多了呢，试了一下只训练40类，还真的可以训练了。研究生小白，具体深层次的原因暂时没搞明白，至少现在可以训练了。有知道的大佬欢迎指教。