提取voc中需要的类别以及修改类名称

最新推荐文章于 2024-04-13 19:57:22 发布

tian-happy

最新推荐文章于 2024-04-13 19:57:22 发布

阅读量2k

点赞数 3

分类专栏：代码文章标签： python VOC数据集

代码专栏收录该内容

12 篇文章

订阅专栏

1.提取需要的类别

https://blog.csdn.net/qq_35239859/article/details/88787554


import os
import shutil
ann_filepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\Annotations\\'
img_filepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\JPEGImages\\'
img_savepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\JPEGImages_person\\'
ann_savepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\Annotations_person\\'

if not os.path.exists(img_savepath):
    os.mkdir(img_savepath)
 
if not os.path.exists(ann_savepath):
    os.mkdir(ann_savepath)
names = locals()
classes = ['aeroplane','bicycle','bird', 'boat', 'bottle',
           'bus', 'car', 'cat', 'chair', 'cow','diningtable',
           'dog', 'horse', 'motorbike', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor', 'person']
 
 
for file in os.listdir(ann_filepath):
    print(file)
    fp = open(ann_filepath + '\\' + file)
    ann_savefile=ann_savepath+file
    fp_w = open(ann_savefile, 'w')
    lines = fp.readlines()
 
    ind_start = []
    ind_end = []
    lines_id_start = lines[:]
    lines_id_end = lines[:]
 
    classes = '\t\t<name>person</name>\n'
 
    #在xml中找到object块，并将其记录下来
    while "\t<object>\n" in lines_id_start:
        a = lines_id_start.index("\t<object>\n")
        ind_start.append(a)
        lines_id_start[a] = "delete"
 
 
    while "\t</object>\n" in lines_id_end:
        b = lines_id_end.index("\t</object>\n")
        ind_end.append(b)
        lines_id_end[b] = "delete"
 
    #names中存放所有的object块
    i = 0
    for k in range(0, len(ind_start)):
        names['block%d' % k] = []
        for j in range(0, len(classes)):
            if classes[j] in lines[ind_start[i] + 1]:
                a = ind_start[i]
                for o in range(ind_end[i] - ind_start[i] + 1):
                    names['block%d' % k].append(lines[a + o])
                break
        i += 1
       
 
 
    #xml头
    string_start = lines[0:ind_start[0]]
    #xml尾
    string_end = [lines[len(lines) - 1]]
 
 
    #在给定的类中搜索，若存在则，写入object块信息
    a = 0
    for k in range(0, len(ind_start)):
        if classes in names['block%d' % k]:
            a += 1
            string_start += names['block%d' % k]
    string_start += string_end
    for c in range(0, len(string_start)):
        fp_w.write(string_start[c])
    fp_w.close()
    #如果没有我们寻找的模块，则删除此xml，有的话拷贝图片
    if a == 0:
        os.remove(ann_savepath+file)
    else:
        name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
        shutil.copy(name_img, img_savepath)
    fp.close()

问题：

Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\xml_name.py", line 31, in <module>
changexml(inputpath)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\xml_name.py", line 10, in changexml
tree = ET.parse(file)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\lib\xml\etree\ElementTree.py", line 1195, in parse
tree.parse(source, parser)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\lib\xml\etree\ElementTree.py", line 596, in parse
self._root = parser._parse_whole(source)
File "<string>", line None
xml.etree.ElementTree.ParseError: mismatched tag: line 53, column 3

原因：

类别的起止标签出现问题（标签不是成对出现）

2.修改xml中某类的名称

import os
import xml.etree.ElementTree as ET

#程序功能：批量修改VOC数据集中xml标签文件的标签名称
def changelabelname(inputpath):
    listdir = os.listdir(inputpath)
    for file in listdir:
        if file.endswith('xml'):
            file = os.path.join(inputpath,file)
            tree = ET.parse(file)
            root = tree.getroot()
            for object1 in root.findall('object'):
                for sku in object1.findall('name'):           #查找需要修改的名称
                    if (sku.text == 'person'):               #‘preName’为修改前的名称
                        sku.text = 'headshoulder'                 #‘TESTNAME’为修改后的名称
                        tree.write(file,encoding='utf-8')     #写进原始的xml文件并避免原始xml中文字符乱码
                    else:
                        pass
        else:
            pass

if __name__ == '__main__':
    inputpath = 'C:\\Users\\Administrator\\Desktop\\Annotations_person\\'  #此处替换为自己的路径
    changelabelname(inputpath)