import os.path
import shutil
import cv2
import xml.etree.ElementTree as ET
import pdb
import os
import math
def deletesinglefile(xmlpath,picpath, delclass,new_xmlpath,new_picpath):
for annoName in os.listdir(xmlpath):
try:
# picFile = os.path.join(picpath, annoName.replace('xml','jpg'))
annofile = os.path.join(xmlpath, annoName)
print(annoName)
# print("picFile:",picFile)
print("annofile:",annofile)
if os.path.exists(annofile) == False:
raise FileNotFoundError
tree = ET.parse(annofile)
root = tree.getroot()
picName = tree.find("filename").text
picFile = os.path.join(picpath, picName)
print("picFile:",picFile)
annos = [anno for anno in root.iter()]
for i, anno in enumerate(annos):
if 'object' in anno.tag:
for element in list(anno):
if 'name' in element.tag:
#删除指定类别标签
if element.text in delclass:
root.remove(annos[i])
# new_full_picpath = os.path.join('%s/%s' % (new_picpath, annoName.replace('xml','jpg')))
# new_full_xmlpath = os.path.join('%s/%s' % (new_xmlpath, annoName))
# print("annofile:",annofile)
# print("new_full_xmlpath:",new_full_xmlpath)
# print("picFile:",picFile)
# print("annofinew_full_picpathle:",new_full_picpath)
# shutil.copy(annofile, new_full_xmlpath)
# shutil.copy(picFile, new_full_picpath)
# print(os.path.basename(annofile)+' have something deleted')
# break
# break
#存储修改
new_full_picpath = os.path.join('%s/%s' % (new_picpath, picName))
new_full_xmlpath = os.path.join('%s/%s' % (new_xmlpath, annoName))
print("annofile:",annofile)
print("new_full_xmlpath:",new_full_xmlpath)
print("picFile:",picFile)
print("annofinew_full_picpathle:",new_full_picpath)
shutil.copy(annofile, new_full_xmlpath)
shutil.copy(picFile, new_full_picpath)
print(os.path.basename(annofile)+' have something deleted')
# break
tree = ET.ElementTree(root)
tree.write(annofile, encoding="utf-8", xml_declaration=True)
except Exception as e:
print(e)
pass
def selectObjectFile(xmlpath, picpath, delclass, new_xmlpath, new_picpath):
# select xml File which contain the selectClass
for annoName in os.listdir(xmlpath):
try:
# picFile = os.path.join(picpath, annoName.replace('xml','jpg'))
annofile = os.path.join(xmlpath, annoName)
# print(annoName)
# print("picFile:",picFile)
# print("annofile:",annofile)
if os.path.exists(annofile) == False:
raise FileNotFoundError
tree = ET.parse(annofile)
root = tree.getroot()
picName = tree.find("filename").text.split('/')[-1]
picFile = os.path.join(picpath, picName)
print("picFile:",picFile)
annos = [anno for anno in root.iter()]
flag = 0
for i, anno in enumerate(annos):
if flag == 1: break
if 'object' in anno.tag:
for element in list(anno):
if 'name' in element.tag:
#删除指定类别标签
if element.text in delclass:
flag = 1
# root.remove(annos[i])
new_full_picpath = os.path.join('%s/%s' % (new_picpath, picName))
new_full_xmlpath = os.path.join('%s/%s' % (new_xmlpath, annoName))
print("annofile:",annofile)
print("new_full_xmlpath:",new_full_xmlpath)
print("picFile:",picFile)
print("annofinew_full_picpathle:",new_full_picpath)
shutil.copy(annofile, new_full_xmlpath)
shutil.copy(picFile, new_full_picpath)
print(os.path.basename(annofile)+' have something move')
break
# break
#存储修改
tree = ET.ElementTree(root)
tree.write(annofile, encoding="utf-8", xml_declaration=True)
except Exception as e:
print(e)
# pass
def deleteObejcet(xmlpath, picpath):
# delete small bbox
for annoName in os.listdir(xmlpath):
try:
# picFile = os.path.join(picpath, annoName.replace('xml','jpg'))
annofile = os.path.join(xmlpath, annoName)
print(annoName)
print("annofile:",annofile)
if os.path.exists(annofile) == False:
raise FileNotFoundError
tree = ET.parse(annofile)
root = tree.getroot()
picFile = tree.find("filename").text
print("picFile:",picFile)
for obj in root.iter('object'):
xmin = int(float(obj.find('bndbox').find('xmin').text))
xmax = int(float(obj.find('bndbox').find('xmax').text))
ymin = int(float(obj.find('bndbox').find('ymin').text))
ymax = int(float(obj.find('bndbox').find('ymax').text))
# print("xmin:",xmin, "xmax:",xmax, "ymin:",ymin, "ymax:",ymax)
if (xmax-xmin)==0 or (ymax-ymin) ==0 :
print("xmin:",xmin, "xmax:",xmax, "ymin:",ymin, "ymax:",ymax)
root.remove(obj)
elif (int(float(ymax))-int(float(ymin)))/(int(float(xmax))-int(float(xmin))) < 0.2 or (int(float(xmax))-int(float(xmin)))/(int(float(ymax))-int(float(ymin))) < 0.2 :
print("xmin:",xmin, "xmax:",xmax, "ymin:",ymin, "ymax:",ymax)
root.remove(obj)
# #存储修改
tree = ET.ElementTree(root)
tree.write(annofile, encoding="utf-8", xml_declaration=True)
except Exception as e:
print(e)
pass
def each_file(imgPath, xmlPath, new_imgPath, new_xmlPath):
'''
读取每个文件夹,将遇到的指定文件统统转移到指定目录中
:param filepath: 想要获取的文件的目录
:param new_filepath: 想要转移的指定目录
:return:
'''
l_dir = os.listdir(imgPath) # 读取目录下的文件或文件夹
for one_dir in l_dir: # 进行循环
full_xmlPath = os.path.join('%s/%s' % (xmlPath, one_dir.replace('jpg','xml'))) # 构造路径
full_imgPath = os.path.join('%s/%s' % (imgPath, one_dir) ) # 构造路径
# new_full_xmlPath = os.path.join('%s/%s' % (new_xmlPath, one_dir))
if os.path.isfile(full_xmlPath): # 如果是文件类型就执行转移操作
if one_dir.split('.')[1] == 'jpg': # 只转移txt文件,修改相应后缀就可以转移不同的文件
# full_xmlPath = os.path.join('%s/%s' % (xmlPath, one_dir.replace('jpg','xml'))) # 构造路径
new_full_xmlPath = os.path.join('%s/%s' % (new_xmlPath, one_dir.replace('jpg','xml')))
new_full_imgPath = os.path.join('%s/%s' % (new_imgPath, one_dir))
shutil.copy(full_xmlPath, new_full_xmlPath) # 这个是转移的语句,最关键的一句话
shutil.copy(full_imgPath, new_full_imgPath) # 这个是转移的语句,最关键的一句话
else: # 不为文件类型就继续递归
# each_file(full_path, new_filepath) # 如果是文件夹类型就有可能下面还有文件,要继续递归
pass
def create_dir_not_exist(path):
if not os.path.exists(path):
os.mkdir(path)
if __name__ == '__main__':
allPath = '/home/hs/data/data_jiaxingyili/goods/smalldeal'
img_path = allPath + '/imgc'
xml_path = allPath + '/xmlc'
new_img_path = allPath + '/imgs'
new_xml_path = allPath + '/xmls'
# xml_path = "/home/hs/data/0_paimian/sku/annotations3"
# img_path = "/home/hs/data/0_paimian/sku/images3"
# new_img_path = "/home/hs/data/0_paimian/sku/img"
# new_xml_path = "/home/hs/data/0_paimian/sku/xml"
create_dir_not_exist(new_img_path)
create_dir_not_exist(new_xml_path)
# each_file(img_path, xml_path, new_img_path, new_xml_path)
# deletesinglefile(xml_path, img_path, 'goods', new_xml_path, new_img_path)
# deleteObejcet(xml_path, img_path)
selectObjectFile(xml_path, img_path, 'goods', new_xml_path, new_img_path)
# deletesinglefile(img_path, xml_path, new_img_path, new_xml_path)
09-15