一,新建VOC2007文件夹
新建一个文件夹,用来存放整个数据集,或者和voc2007一样的名字:VOC2007
然后像voc2007一样,在文件夹里面新建如下文件夹:
二、将训练图片放到JPEGImages
将所有的训练图片放到该文件夹里,然后将图片重命名为VOC2007的“000005.jpg”形式
重命名文件:
import os
path = "E:\\image"
filelist = os.listdir(path) #该文件夹下所有的文件(包括文件夹)
count=0
for file in filelist:
print(file)
for file in filelist: #遍历所有文件
Olddir=os.path.join(path,file) #原来的文件路径
if os.path.isdir(Olddir): #如果是文件夹则跳过
continue
filename=os.path.splitext(file)[0] #文件名
filetype=os.path.splitext(file)[1] #文件扩展名
Newdir=os.path.join(path,str(count).zfill(6)+filetype) #用字符串函数zfill 以0补全所需位数
os.rename(Olddir,Newdir)#重命名
count+=1
主要代码:
str(VOC2007_JPEG_path_len).zfill(6)
可根据自己按照以上代码做修改
三,标签+坐标
样式:000003.jpg LabelC 220 168 120 150
需要注意的是坐标:
220 左上顶点X坐标
168 左上顶点Y坐标
120 标签图片宽
150 标签图片高
四,生成xml代码
txt内标签 置信度 x_min y_min w h
注:x_max = x_min + w
y_max = y_min + h
# 标注数据 格式(filename label x_min y_min x_max y_max)
"""
import cv2
import os
import xml.etree.ElementTree as ET
"""
pretty_xml函数:对写入的xml文件进行美化修饰
elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行
"""
def Coordinate2XML(imagepath,outputxmlpath,Coordinatelist,folder):
def pretty_xml(element, indent, newline, level=0):
if element: # 判断element是否有子元素
if (element.text is None) or element.text.isspace(): # 如果element的text没有内容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此处两行如果把注释去掉,Element的text也会另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 将element转成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个
subelement.tail = newline + indent * level
pretty_xml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作
xml_file_path = './anno.xml'
annotation_dir = '/Annotations/'
img_file_name = os.path.split(imagepath)[1]
tree = ET.parse(xml_file_path)
root = tree.getroot()
for i,line in enumerate(Coordinatelist):
trainFile = line.split()
# txt中的第一条information在写入xml时,负责将img_name、size、object写入;其余的information只写object
if i ==0:
lable = trainFile[1]
xmin = int(float(trainFile[2]))-1
ymin = int(float(trainFile[3]))-1
xmax = int(float(trainFile[4]))+1 + xmin+1
ymax = int(float(trainFile[5]))+1 + ymin+1
root.find('filename').text = img_file_name
root.find('folder').text = folder
root.find('path').text = imagepath.replace('\\', '/')
# size
sz = root.find('size')
img = cv2.imread(imagepath) # 读取图片信息
# cv2.imshow("img",img)
# cv2.waitKey(0)
sz.find('height').text = str(img.shape[0])
sz.find('width').text = str(img.shape[1])
sz.find('depth').text = str(img.shape[2])
# object
obj = root.find('object')
obj.find('name').text = lable
bb = obj.find('bndbox')
bb.find('xmin').text = str(xmin)
bb.find('ymin').text = str(ymin)
bb.find('xmax').text = str(xmax)
bb.find('ymax').text = str(ymax)
# 如果重复,则需要添加object框
else:
lable = trainFile[1]
xmin = int(float(trainFile[2]))-1
ymin = int(float(trainFile[3]))-1
xmax = int(float(trainFile[4]))+1 + xmin+1
ymax = int(float(trainFile[5]))+1 + ymin+1
# 建立新的object元素
obj = ET.SubElement(root, 'object')
name = ET.SubElement(obj, 'name')
name.text = lable
pose = ET.SubElement(obj, 'pose')
pose.text = "Unspecified"
truncated = ET.SubElement(obj, 'truncated')
truncated.text = "0"
difficult = ET.SubElement(obj, 'difficult')
difficult.text = "0"
# 建立bndbox信息
bndbox = ET.SubElement(obj, 'bndbox')
bndbox_xmin = ET.SubElement(bndbox, 'xmin',)
bndbox_xmin.text = str(xmin)
bndbox_ymin = ET.SubElement(bndbox, 'ymin')
bndbox_ymin.text = str(ymin)
bndbox_xmax = ET.SubElement(bndbox, 'xmax')
bndbox_xmax.text = str(xmax)
bndbox_ymax = ET.SubElement(bndbox, 'ymax')
bndbox_ymax.text = str(ymax)
xml_file = img_file_name.replace('jpg', 'xml')
pretty_xml(root, '\t', '\n') # 执行美化方法
tree.write(os.path.join(outputxmlpath,xml_file), encoding='utf-8')
imagepath 图片路径
outputxmlpath 输出xml路径
Coordinatelist 坐标列表
Coordinatelist例如:
['000003.jpg LabelA 277 235 240 160', '000003.jpg LabelB 220 168 120 150', '000003.jpg LabelC 220 168 120 150']
folder 图片文件夹名
生成xml:
XML:
<annotation>
<folder>JPEGImages</folder>
<filename>000001.jpg</filename>
<path>E:/beto/DrawingIdentification/fasterrcnn/VOC2007/JPEGImages/000001.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>610</width>
<height>610</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>LabelA</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>276</xmin>
<ymin>234</ymin>
<xmax>518</xmax>
<ymax>396</ymax>
</bndbox>
</object>
<object>
<name>LabelB</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>219</xmin>
<ymin>167</ymin>
<xmax>341</xmax>
<ymax>319</ymax>
</bndbox>
</object>
<object>
<name>LabelC</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>219</xmin>
<ymin>167</ymin>
<xmax>341</xmax>
<ymax>319</ymax>
</bndbox>
</object>
</annotation>
五,xml制作VOC数据集
import os
import random
import xml.etree.ElementTree as ET
import numpy as np
from fasterrcnn.utils.utils import get_classes
def voc_annotation():
#--------------------------------------------------------------------------------------------------------------------------------#
# annotation_mode用于指定该文件运行时计算的内容
# annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
# annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
# annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
#--------------------------------------------------------------------------------------------------------------------------------#
annotation_mode = 0
#-------------------------------------------------------------------#
# 必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息
# 与训练和预测所用的classes_path一致即可
# 如果生成的2007_train.txt里面没有目标信息
# 那么就是因为classes没有设定正确
# 仅在annotation_mode为0和2的时候有效
#-------------------------------------------------------------------#
classes_path = 'model_data/voc_classes.txt'
#--------------------------------------------------------------------------------------------------------------------------------#
# trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1
# train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1
# 仅在annotation_mode为0和1的时候有效
#--------------------------------------------------------------------------------------------------------------------------------#
trainval_percent = 0.9
train_percent = 0.9
#-------------------------------------------------------#
# 指向VOC数据集所在的文件夹
# 默认指向根目录下的VOC数据集
#-------------------------------------------------------#
VOCdevkit_path = '../fasterrcnn'
VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')]
classes, _ = get_classes(classes_path)
#-------------------------------------------------------#
# 统计目标数量
#-------------------------------------------------------#
photo_nums = np.zeros(len(VOCdevkit_sets))
nums = np.zeros(len(classes))
def convert_annotation(year, image_id, list_file):
in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8')
tree=ET.parse(in_file)
root = tree.getroot()
for obj in root.iter('object'):
difficult = 0
if obj.find('difficult')!=None:
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
nums[classes.index(cls)] = nums[classes.index(cls)] + 1
random.seed(0)
if " " in os.path.abspath(VOCdevkit_path):
raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。")
if annotation_mode == 0 or annotation_mode == 1:
print("Generate txt in ImageSets.")
xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations')
saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main')
temp_xml = os.listdir(xmlfilepath)
total_xml = []
for xml in temp_xml:
if xml.endswith(".xml"):
total_xml.append(xml)
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
print("train and val size", tv)
print("train size", tr)
ftrainval = open(os.path.join(saveBasePath, 'trainval.txt'), 'w')
ftest = open(os.path.join(saveBasePath, 'test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath, 'train.txt'), 'w')
fval = open(os.path.join(saveBasePath, 'val.txt'), 'w')
for i in list:
name = total_xml[i][:-4] + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
print("Generate txt in ImageSets done.")
if annotation_mode == 0 or annotation_mode == 2:
print("Generate 2007_train.txt and 2007_val.txt for train.")
type_index = 0
for year, image_set in VOCdevkit_sets:
image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt' % (year, image_set)),
encoding='utf-8').read().strip().split()
list_file = open('%s_%s.txt' % (year, image_set), 'w', encoding='utf-8')
for image_id in image_ids:
list_file.write('%s/VOC%s/JPEGImages/%s.jpg' % (os.path.abspath(VOCdevkit_path), year, image_id))
convert_annotation(year, image_id, list_file)
list_file.write('\n')
photo_nums[type_index] = len(image_ids)
type_index += 1
list_file.close()
print("Generate 2007_train.txt and 2007_val.txt for train done.")
def printTable(List1, List2):
for i in range(len(List1[0])):
print("|", end=' ')
for j in range(len(List1)):
print(List1[j][i].rjust(int(List2[j])), end=' ')
print("|", end=' ')
print()
str_nums = [str(int(x)) for x in nums]
tableData = [
classes, str_nums
]
colWidths = [0] * len(tableData)
len1 = 0
for i in range(len(tableData)):
for j in range(len(tableData[i])):
if len(tableData[i][j]) > colWidths[i]:
colWidths[i] = len(tableData[i][j])
printTable(tableData, colWidths)
if photo_nums[0] <= 500:
print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。")
if np.sum(nums) == 0:
print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!")
print("(重要的事情说三遍)。")
# if __name__ == "__main__":
# voc_annotation()
生成效果:
后面就可以开始训练啦