问题1:python脚本读取xml文件?
xml文件示例:
读取的效果:分别读取类别名称和位置信息并保存在classes1.txt和TrainList.txt文件里,效果图如下
目录结构如下:
总代码如下:
import xml.etree.ElementTree as ET
import os
import numpy as np
xml_file = './test-xml'
SaveDir1='classes.txt' # 保存类别名称文件
SaveDir2='TrainList.txt' # 保存每个实例位置
SaveDir3='classes1.txt' # 保存非重复类别文件
imgfile='img'
files= os.listdir(xml_file)
files1= os.listdir(imgfile)
for file,name in zip(files,files1):
position = xml_file+'/'+ file
print(position)
tree = ET.parse(position)
root = tree.getroot()
for size in root.iter('size'):
width = int(size.find('width').text)
height = int(size.find('height').text)
print('图像宽',width,'图像高',height)
with open(SaveDir2, 'a+') as f:
f.write(name+ " ")
for obj in root.iter('object'):
cls = obj.find('name').text
xmlbox = obj.find('bndbox')
xmin = int(xmlbox.find('xmin').text)
ymin = int(xmlbox.find('ymin').text)
xmax = int(xmlbox.find('xmax').text)
ymax = int(xmlbox.find('ymax').text)
num=[xmin,ymin,xmax,ymax]
#np.savetxt(SaveDir2,num,fmt='%d',newline=',')
with open(SaveDir2, 'a+') as f:
np.savetxt(f, num,fmt='%d',newline=',')
print('类型',cls,'xmin',xmin,'ymin',ymin,'xmax',xmax,'ymax',ymax)
with open(SaveDir1,"a",encoding="utf-8") as f:
f.write(cls + "\n")
f.flush()
f.close()
with open(SaveDir2, 'a+') as f:
f.write('\n')
f_read = open(SaveDir1, 'r', encoding='utf-8') # 要去重的文件
txt = f_read.readlines()
for w in txt:
f_write=open(SaveDir3, 'r')
txt2 = f_write.readlines()
with open(SaveDir3, 'a',encoding='utf-8') as f:
if w not in txt2:
f.write(w)
else:
print("已去除重复-->"+w)
f.close()
f_read.close()
print('完成')
相关问题总结
1.如何同时遍历多个文件名字?
用for file,name in zip(files,files1)
函数
files= os.listdir(xml_file)
files1= os.listdir(imgfile)
for file,name in zip(files,files1):
position = xml_file+'/'+ file
print(position)
tree = ET.parse(position)
root = tree.getroot()
for size in root.iter('size'):
width = int(size.find('width').text)
height = int(size.find('height').text)
print('图像宽',width,'图像高',height)
with open(SaveDir2, 'a+') as f:
f.write(name+ " ")
2.如何对txt文件里重复的字符串取重?
with open(SaveDir2, 'a+') as f:
f.write('\n')
f_read = open(SaveDir1, 'r', encoding='utf-8') # 要去重的文件
txt = f_read.readlines()
for w in txt:
f_write=open(SaveDir3, 'r') # 去重保存文件
txt2 = f_write.readlines()
with open(SaveDir3, 'a',encoding='utf-8') as f:
if w not in txt2:
f.write(w)
else:
print("已去除重复-->"+w)
f.close()
f_read.close()
print('完成')
3.如何追加保存int类型数据:
将int类型转化为数组,用 np.savetxt()
保存,而不是用 f.write()
保存。
xmin = int(xmlbox.find('xmin').text)
ymin = int(xmlbox.find('ymin').text)
xmax = int(xmlbox.find('xmax').text)
ymax = int(xmlbox.find('ymax').text)
num=[xmin,ymin,xmax,ymax]
#np.savetxt(SaveDir2,num,fmt='%d',newline=',')
with open(SaveDir2, 'a+') as f:
np.savetxt(f, num,fmt='%d',newline=',') # 将int类型数据追加保存到同一行
print('类型',cls,'xmin',xmin,'ymin',ymin,'xmax',xmax,'ymax',ymax)
with open(SaveDir1,"a",encoding="utf-8") as f:
f.write(cls + "\n") # 对不同文件数据保存时换行
f.flush()
f.close()