提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
前言
提示:主要是用python操作xml
因为最近模型训练的时候遇到很多xml相关操作,为省事记录相关脚本
import traceback
import xml.etree.ElementTree as ET
import os.path as osp
import os
import shutil
from pathlib import Path
from tqdm import tqdm
一、修改xml里的图片名和label
def rename_xml(xml_path, new_name, new_Annotations):
try:
in_file = open(xml_path)
tree = ET.parse(in_file)
root = tree.getroot()
root.find('filename').text = new_name
root.find('folder').text = 'JPEGImages'
for obj in root.iter('object'):
label = obj.find('name').text
if label == 'person' or label == 'headshoulder':#headshoulder
continue
elif label == 'head_shoulder':
obj.find('name').text = 'headshoulder'
else:
root.remove(obj)
print(label)
objs = root.findall('object')
if len(objs) > 0:
tree.write(osp.join(new_Annotations, new_name.replace('jpg', 'xml')))
else:
print('no labels !')
except:
print(xml_path)
二、删除xml里的多余label
def delete_label(xml_path, labels):
in_file = open(xml_path)
try:
tree = ET.parse(in_file)
root = tree.getroot()
objs = root.findall('object')
temp = []
if len(objs) == 0:
return labels
for obj in objs:
label = obj.find('name').text
if 'shoulder' in label:
obj.find('name').text = 'headshoulder'
temp.append(label)
elif label == 'person':
temp.append(label)
else:
root.remove(obj)
p = '/temp' #保存路径
if not os.path.exists(p):
os.makedirs(p)
if len(set(temp)) == 2:
tree.write(Path(p)/Path(xml_path).name)
return list(set(temp))
except Exception:
traceback.print_exc()
print(xml_path)
in_file.close()
def run():
root = '/media/VOC2012' #
JPEGImages = osp.join(root, 'JPEGImages')
Annotations = osp.join(root, 'Annotations')
labels = []
ind = 0
for jpg_file in tqdm(os.listdir(JPEGImages)):
# print('total: %d, i=%d' % (len(os.listdir(JPEGImages)), ind))
new_name = str(0) * (6 - len(str(ind))) + str(ind)
xml_name = jpg_file.replace('jpg', 'xml')
xml_path = osp.join(Annotations, xml_name)
if osp.exists(xml_path):
labels = delete_label(xml_path,labels)
p = '/media/deepstream/潘执政电话17865572379/数据集/headshoulder/VOC/VOCdevkit/VOC2012/temp'
size = len(os.listdir(p))
print('all num: ',size)
def mv_jpg():
root = '/VOC2012/'
JPG_ = '/JPEGImages_new'
JPEGImages = osp.join(root, 'JPEGImages')
Annotations = osp.join(root, 'temp')
for xml_file in tqdm(os.listdir(Annotations)):
jpg_name = xml_file.replace('xml', 'jpg')
jpg_path = osp.join(JPEGImages, jpg_name)
if osp.exists(jpg_path):
# print('ddd')
shutil.copy(jpg_path,Path(JPG_)/jpg_name)
if __name__ == '__main__':
run()
2.读入数据
代码如下(示例):
data = pd.read_csv(
'https://labfile.oss.aliyuncs.com/courses/1283/adult.data.csv')
print(data.head())
该处使用的url网络请求的数据。
总结
提示:这里对文章进行总结:
例如:以上就是今天要讲的内容,本文仅仅简单介绍了pandas的使用,而pandas提供了大量能使我们快速便捷地处理数据的函数和方法。