深度学习中经常需要训练无标记的负样本,用于降低目标检测中的误检率、误识别率。生成负样本xml文件的一个方法是用labelImg在图像上随便画一个框,生成xml文件,然后手动去除相关的object节点,但是负样本太多的话,这种方法太耗时间,所以我写了一个脚本可以批量生成空的xml文件。此外本博客还包含了如何批量修改图像尺寸、文件名称、xml文件内容,这些也经常用到,代码如下,大家可以适当参考。小编水平有限,如有错误,麻烦各位告诉我一下哈!
一、批量修改图像尺寸
import os
import cv2
#obtain the filename
path_ori = '/home/dulingwen/Music/jpg/'
filename = os.listdir(path_ori)
#resize the image
for fn in filename:
img = cv2.imread(path_ori+fn)
dim = (1920,1080)
img_res = cv2.resize(img,dim,interpolation=cv2.INTER_AREA)
cv2.imwrite(path_ori+fn,img_res)
二、批量修改文件名称(以修改xml文件名为对应的jpg文件名为例,不包含后缀)
import os
path_ori = '/home/dulingwen/Music/jpg/'
path_mod = '/home/dulingwen/Music/xml/'
file_ori = os.listdir(path_ori)
file_mod = os.listdir(path_mod)
#print('file_mod number is ',len(file_mod))
#print('file_ori number is ',len(file_ori))
for fn in file_mod:
oldname = path_mod + fn
newname = path_mod + file_ori[n][:-4] + '.xml'
os.rename(oldname,newname)
print(oldname,newname)
三、批量修改xml文件各节点内容
import os
import xml.dom.minidom
import xml.etree.ElementTree
xmldir = '/home/dulingwen/Music/xml/'
for xmlfile in os.listdir(xmldir):
xmlname = os.path.splitext(xmlfile)[0]
#read the xml file
dom = xml.dom.minidom.parse(os.path.join(xmldir,xmlfile))
root = dom.documentElement
#obtain the filename label pair and give it a new value
root.getElementsByTagName('filename')[0].firstChild.data = xmlname + '.jpg'
root.getElementsByTagName('path')[0].firstChild.data = '/home/dulingwen/Music/jpg/' + xmlname + '.jpg'
root.getElementsByTagName('width')[0].firstChild.data = '1920'
root.getElementsByTagName('height')[0].firstChild.data = '1080'
xml_specific = xmldir + xmlfile
with open(xml_specific,'w') as fh:
dom.writexml(fh)
四、批量生成空的xml文件
import os
import xml.dom.minidom
img_path = '/home/dulingwen/Pictures/img/'
xml_path = '/home/dulingwen/Pictures/xml/'
for img_file in os.listdir(img_path):
img_name = os.path.splitext(img_file)[0]
#create an empty dom document object
doc = xml.dom.minidom.Document()
#creat a root node which name is annotation
annotation = doc.createElement('annotation')
#add the root node to the dom document object
doc.appendChild(annotation)
#add the folder subnode
folder = doc.createElement('folder')
folder_text = doc.createTextNode('VOC2012')
folder.appendChild(folder_text)
annotation.appendChild(folder)
#add the filename subnode
filename = doc.createElement('filename')
filename_text = doc.createTextNode(img_file)
filename.appendChild(filename_text)
annotation.appendChild(filename)
# add the path subnode
path = doc.createElement('path')
path_text = doc.createTextNode(img_path + img_file)
path.appendChild(path_text)
annotation.appendChild(path)
#add the source subnode
source = doc.createElement('source')
database = doc.createElement('database')
database_text = doc.createTextNode('Unknown')
source.appendChild(database)
database.appendChild(database_text)
annotation.appendChild(source)
#add the size subnode
size = doc.createElement('size')
width = doc.createElement('width')
width_text = doc.createTextNode('1920')
height = doc.createElement('height')
height_text = doc.createTextNode('1080')
depth = doc.createElement('depth')
depth_text = doc.createTextNode('3')
size.appendChild(width)
width.appendChild(width_text)
size.appendChild(height)
height.appendChild(height_text)
size.appendChild(depth)
depth.appendChild(depth_text)
annotation.appendChild(size)
#add the segmented subnode
segmented = doc.createElement('segmented')
segmented_text = doc.createTextNode('0')
segmented.appendChild(segmented_text)
annotation.appendChild(segmented)
#write into the xml text file
os.mknod(xml_path+'%s.xml'%img_name)
fp = open(xml_path+'%s.xml'%img_name, 'w+')
doc.writexml(fp, indent='\t', addindent='\t', newl='\n', encoding='utf-8')
fp.close()