1.提取需要的类别
https://blog.csdn.net/qq_35239859/article/details/88787554
import os
import shutil
ann_filepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\Annotations\\'
img_filepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\JPEGImages\\'
img_savepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\JPEGImages_person\\'
ann_savepath='C:\\Users\\Administrator\\Desktop\\VOC2007\\Annotations_person\\'
if not os.path.exists(img_savepath):
os.mkdir(img_savepath)
if not os.path.exists(ann_savepath):
os.mkdir(ann_savepath)
names = locals()
classes = ['aeroplane','bicycle','bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow','diningtable',
'dog', 'horse', 'motorbike', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor', 'person']
for file in os.listdir(ann_filepath):
print(file)
fp = open(ann_filepath + '\\' + file)
ann_savefile=ann_savepath+file
fp_w = open(ann_savefile, 'w')
lines = fp.readlines()
ind_start = []
ind_end = []
lines_id_start = lines[:]
lines_id_end = lines[:]
classes = '\t\t<name>person</name>\n'
#在xml中找到object块,并将其记录下来
while "\t<object>\n" in lines_id_start:
a = lines_id_start.index("\t<object>\n")
ind_start.append(a)
lines_id_start[a] = "delete"
while "\t</object>\n" in lines_id_end:
b = lines_id_end.index("\t</object>\n")
ind_end.append(b)
lines_id_end[b] = "delete"
#names中存放所有的object块
i = 0
for k in range(0, len(ind_start)):
names['block%d' % k] = []
for j in range(0, len(classes)):
if classes[j] in lines[ind_start[i] + 1]:
a = ind_start[i]
for o in range(ind_end[i] - ind_start[i] + 1):
names['block%d' % k].append(lines[a + o])
break
i += 1
#xml头
string_start = lines[0:ind_start[0]]
#xml尾
string_end = [lines[len(lines) - 1]]
#在给定的类中搜索,若存在则,写入object块信息
a = 0
for k in range(0, len(ind_start)):
if classes in names['block%d' % k]:
a += 1
string_start += names['block%d' % k]
string_start += string_end
for c in range(0, len(string_start)):
fp_w.write(string_start[c])
fp_w.close()
#如果没有我们寻找的模块,则删除此xml,有的话拷贝图片
if a == 0:
os.remove(ann_savepath+file)
else:
name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
shutil.copy(name_img, img_savepath)
fp.close()
问题:
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\xml_name.py", line 31, in <module>
changexml(inputpath)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\xml_name.py", line 10, in changexml
tree = ET.parse(file)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\lib\xml\etree\ElementTree.py", line 1195, in parse
tree.parse(source, parser)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\lib\xml\etree\ElementTree.py", line 596, in parse
self._root = parser._parse_whole(source)
File "<string>", line None
xml.etree.ElementTree.ParseError: mismatched tag: line 53, column 3
原因:
类别的起止标签出现问题(标签不是成对出现)
2.修改xml中某类的名称
import os
import xml.etree.ElementTree as ET
#程序功能:批量修改VOC数据集中xml标签文件的标签名称
def changelabelname(inputpath):
listdir = os.listdir(inputpath)
for file in listdir:
if file.endswith('xml'):
file = os.path.join(inputpath,file)
tree = ET.parse(file)
root = tree.getroot()
for object1 in root.findall('object'):
for sku in object1.findall('name'): #查找需要修改的名称
if (sku.text == 'person'): #‘preName’为修改前的名称
sku.text = 'headshoulder' #‘TESTNAME’为修改后的名称
tree.write(file,encoding='utf-8') #写进原始的xml文件并避免原始xml中文字符乱码
else:
pass
else:
pass
if __name__ == '__main__':
inputpath = 'C:\\Users\\Administrator\\Desktop\\Annotations_person\\' #此处替换为自己的路径
changelabelname(inputpath)