此篇是为了防止我遗忘,特意发出来
txt转xml
import time
import os
from PIL import Image
import cv2
import numpy as np
'''人为构造xml文件的格式'''
out0 ='''<annotation>
<folder>%(folder)s</folder>
<filename>%(name)s</filename>
<path>%(path)s</path>
<source>
<database>None</database>
</source>
<size>
<width>%(width)d</width>
<height>%(height)d</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
'''
out1 = ''' <object>
<name>%(class)s</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>%(xmin)d</xmin>
<ymin>%(ymin)d</ymin>
<xmax>%(xmax)d</xmax>
<ymax>%(ymax)d</ymax>
</bndbox>
</object>
'''
out2 = '''</annotation>
'''
'''txt转xml函数'''
def translate(fdir,lists):
source = {}
label = {}
for jpg in lists:
print(jpg)
if jpg[-4:] == '.bmp':
image= cv2.imread(jpg)#路径不能有中文
h,w,_ = image.shape #图片大小
# cv2.imshow('1',image)
# cv2.waitKey(1000)
# cv2.destroyAllWindows()
fxml = jpg.replace('.bmp','.xml')
fxml = open(fxml, 'w');
imgfile = jpg.split('/')[-1]
source['name'] = imgfile
source['path'] = jpg
source['folder'] = os.path.basename(fdir)
source['width'] = w
source['height'] = h
fxml.write(out0 % source)
txt = jpg.replace('.bmp','.txt')
lines = np.loadtxt(txt)#读入txt存为数组
#print(type(lines))
for box in lines:
#print(box.shape)
if box.shape != (5,):
box = lines
'''把txt上的第一列(类别)转成xml上的类别
我这里是labelimg标1、2、3,对应txt上面的0、1、2'''
label['class'] = str(int(box[0])+1) #类别索引从1开始
'''把txt上的数字(归一化)转成xml上框的坐标'''
xmin = float(box[1] - 0.5*box[3])*w
ymin = float(box[2] - 0.5*box[4])*h
xmax = float(xmin + box[3]*w)
ymax = float(ymin + box[4]*h)
label['xmin'] = xmin
label['ymin'] = ymin
label['xmax'] = xmax
label['ymax'] = ymax
# if label['xmin']>=w or label['ymin']>=h or label['xmax']>=w or label['ymax']>=h:
# continue
# if label['xmin']<0 or label['ymin']<0 or label['xmax']<0 or label['ymax']<0:
# continue
fxml.write(out1 % label)
fxml.write(out2)
if __name__ == '__main__':
file_dir = "G:/1"
lists=[]
for i in os.listdir(file_dir):
if i[-3:]=='bmp':
lists.append(file_dir+'/'+i)
#print(lists)
translate(file_dir,lists)
print('---------------Done!!!--------------')
xml转txt
import os
import xml.etree.ElementTree as ET
# xml文件存放目录(修改成自己的文件名)
input_dir = r''
# 输出txt文件目录(自己创建的文件夹)
out_dir = r''
class_list = ["P","W","B","F"]
# 获取目录所有xml文件
def file_name(input_dir):
F = []
for root, dirs, files in os.walk(input_dir):
for file in files:
# print file.decode('gbk') #文件名中有中文字符时转码
if os.path.splitext(file)[1] == '.xml':
t = os.path.splitext(file)[0]
F.append(t) # 将所有的文件名添加到L列表中
return F # 返回L列表
# 获取所有分类
def get_class(filelist):
for i in filelist:
f_dir = input_dir + "\\" + i + ".xml"
in_file = open(f_dir, encoding='UTF-8')
filetree = ET.parse(in_file)
in_file.close()
root = filetree.getroot()
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in class_list:
class_list.append(cls)
def ConverCoordinate(imgshape, bbox):
# 将xml像素坐标转换为txt归一化后的坐标
xmin, xmax, ymin, ymax = bbox
width = imgshape[0]
height = imgshape[1]
dw = 1. / width
dh = 1. / height
x = (xmin + xmax) / 2.0
y = (ymin + ymax) / 2.0
w = xmax - xmin
h = ymax - ymin
# 归一化
x = x * dw
y = y * dh
w = w * dw
h = h * dh
return x, y, w, h
def readxml(i):
f_dir = input_dir + "\\" + i + ".xml"
txtresult = ''
outfile = open(f_dir, encoding='UTF-8')
filetree = ET.parse(outfile)
outfile.close()
root = filetree.getroot()
# 获取图片大小
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
imgshape = (width, height)
# 转化为yolov5的格式
for obj in root.findall('object'):
# 获取类别名
obj_name = obj.find('name').text
obj_id = class_list.index(obj_name)
# 获取每个obj的bbox框的左上和右下坐标
bbox = obj.find('bndbox')
xmin = float(bbox.find('xmin').text)
xmax = float(bbox.find('xmax').text)
ymin = float(bbox.find('ymin').text)
ymax = float(bbox.find('ymax').text)
bbox_coor = (xmin, xmax, ymin, ymax)
x, y, w, h = ConverCoordinate(imgshape, bbox_coor)
txt = '{} {} {} {} {}\n'.format(obj_id, x, y, w, h)
txtresult = txtresult + txt
# print(txtresult)
f = open(out_dir + "\\" + i + ".txt", 'a')
f.write(txtresult)
f.close()
# 获取文件夹下的所有文件
filelist = file_name(input_dir)
# 获取所有分类
get_class(filelist)
# 打印class
print(class_list)
# xml转txt
for i in filelist:
readxml(i)
# 在out_dir下生成一个class文件
f = open(out_dir + "\\classes.txt", 'a')
classresult = ''
for i in class_list:
classresult = classresult + i + "\n"
f.write(classresult)
f.close()