一、修改xml中标签
import os
import shutil
from tqdm import tqdm
import xml.etree.ElementTree as ET
from loguru import logger
@logger.catch()
def datasets_info():
source = './train/temp'
instance = {}
all_xml = 0
for root1, dirs, files in os.walk(source):
for file in files:
if file.endswith('.xml'):
all_xml +=1
path = os.path.join(root1, file)
tree = ET.parse(path)
root = tree.getroot()
objects = root.findall('object')
for obj in objects:
name = obj.find('name').text
if name not in instance:
instance[name] = 1
else:
instance[name] += 1
print('total: %d' % all_xml)
print('示例框:', instance)
def modify_label():
source = './train/temp'
# front_whee working_luggae_transfer_truck
for root, dirs, files in os.walk(source):
for file in files:
fp = os.path.join(root, file)
if file.endswith('.xml'):
tree = ET.parse(fp)
root1 = tree.getroot()
objects = root1.findall('object')
bo = False
for obj in objects:
name = obj.find('name').text
if name == 'front_whee':
print('label modify: ',fp)
obj.find('name').text = 'front_wheel'
bo = True
if bo:
tree.write(fp)
if __name__=='__main__':
modify_label()
二、移动文件以及删除空标签的xml
1、从CVAT下载数据后,数据结构例如: 合肥机场加油.zip 解压后的文件路径
合肥机场加油/Annotations/合肥机场加油/JPEGImages/05号停机位枪机_4_0003306.xml
需要将 xml等文件 移动到 “合肥机场加油/Annotations/ ”下
2、删除没有标签的xml
import os
import shutil
import xml.etree.ElementTree as ET
from loguru import logger
def move_file():
source = '/media/deepstream/DATA/pan/hefei/2'
for root, dirs, files in os.walk(source):
for file in files:
fp = os.path.join(root, file)
if file.endswith('.jpg'):
dp = fp.split('JPEGImages')
jpg_path = os.path.join(dp[0], 'JPEGImages')
shutil.move(fp, jpg_path)
d = os.path.join(jpg_path, dp[1].split('/')[1])
d_nums = os.listdir(root)
if os.path.exists(d) and len(d_nums)==0:
shutil.rmtree(d)
print('remove jpg: ', d)
if file.endswith('.xml'):
dp = fp.split('Annotations')
xml_path = os.path.join(dp[0], 'Annotations')
shutil.move(fp, xml_path)
d = os.path.join(xml_path, dp[1].split('/')[1])
d_nums = os.listdir(root)
if os.path.exists(d) and len(d_nums) == 0:
shutil.rmtree(d)
print('remove xml: ', d)
@logger.catch()
def del_null_xml():
source = './train/temp'
for root, dirs, files in os.walk(source):
temp = []
for file in files:
if file.endswith('.xml'):
xml_path = os.path.join(root, file)
jpg_name = xml_path.replace('Annotations', 'JPEGImages').replace('xml', 'jpg')
tree = ET.parse(xml_path)
root1 = tree.getroot()
objects = root1.findall('object')
if len(objects) == 0:
temp.append(file)
logger.info(file)
os.remove(jpg_name)
os.remove(xml_path)
logger.info('{} files num: {}'.format(root, len(temp)))
if __name__=='__main__':
move_file()
三、根据xml文件打mosaic
import os
import xml.etree.ElementTree as ET
from PIL import Image
from tqdm import tqdm
def _mosaic(img):
s = img.size
img = img.resize((1, 1))
img = img.resize(s)
return img
def mosaic(img, fx, fy, tx, ty):
c = img.crop((fx, fy, tx, ty))
c = _mosaic(c)
img.paste(c, (fx, fy, tx, ty))
return img
def read_xml(xml_path, f_path,source_dist):
img = Image.open(f_path)
f_name = os.path.basename(f_path)
tree = ET.parse(xml_path)
root = tree.getroot()
objs = root.findall('object')
bo = False
for obj in objs:
name = obj.find('name').text
xmin = obj.find('bndbox').find('xmin').text
ymin = obj.find('bndbox').find('ymin').text
xmax = obj.find('bndbox').find('xmax').text
ymax = obj.find('bndbox').find('ymax').text
if 'other' in name:
img = mosaic(img,round(float(xmin)), round(float(ymin)), round(float(xmax)), round(float(ymax)))
root.remove(obj)
bo = True
# continue
# img.show()
if bo:
img.save(os.path.join(source_dist,f_name))
tree.write(xml_path)
print(xml_path)
def mosaic_main():
source = './train/temp'
for root, dirs, files in os.walk(source):
for file in files:
#fp = os.path.join(root, file)
if file.endswith('.xml'):
xml_path = os.path.join(root, file)
#xml_path = os.path.join(source_xml, n.replace('jpg', 'xml'))
f_path = xml_path.replace('Annotations', 'JPEGImages').replace('xml', 'jpg')
source_dist = os.path.dirname(f_path)
# source_dist = './train/temp1/'
if os.path.exists(xml_path):
read_xml(xml_path,f_path, source_dist)
if __name__=='__main__':
mosaic_main()