一、首先,处理数据集
KITTI数据集的label虽然它是txt形式,但它拥有的独特数据格式不符合YOLO的训练。所以要对其进行处理。原本的标签数据格式是下图:可以看到拥有十多个数字,
而我们只需要表示目标类别信息的第一列和表示物体所在图象中表示矩形框位置的4个坐标信息共五个参数信息。
第1步:首先在txt格式基础上进行实验所需类别的保留/合并操作,我这里只保留了car、van、及truck3个类别。
# modify_annotations_txt.py
import glob
import string
txt_list = glob.glob('E:/datasets/yolov5-6.0/mydata/labels/train/*.txt') # 存储Labels文件夹所有txt文件路径
print(txt_list)
def show_category(txt_list):
category_list= []
for item in txt_list:
try:
with open(item) as tdf:
for each_line in tdf:
labeldata = each_line.strip().split(' ') # 去掉前后多余的字符并把其分开
category_list.append(labeldata[0]) # 只要第一个字段,即类别
except IOError as ioerr:
print('File error:'+str(ioerr))
print(set(category_list)) # 输出集合
def merge(line):
each_line=''
for i in range(len(line)):
if i!= (len(line)-1):
each_line=each_line+line[i]+' '
else:
each_line=each_line+line[i] # 最后一条字段后面不加空格
each_line=each_line+'\n'
return (each_line)
print('before modify categories are:\n')
show_category(txt_list)
for item in txt_list:
new_txt=[]
try:
with open(item, 'r') as r_tdf:
for each_line in r_tdf:
labeldata = each_line.strip().split(' ')
# if labeldata[0] in ['Truck','Van','Tram']: # 合并汽车类
# labeldata[0] = labeldata[0].replace(labeldata[0],'Car')
#if labeldata[0] == 'Person_sitting': # 合并行人类
# labeldata[0] = labeldata[0].replace(labeldata[0],'Pedestrian')
if labeldata[0] == 'DontCare': # 忽略Dontcare类
continue
if labeldata[0] == 'Misc': # 忽略Misc类
continue
if labeldata[0] == 'Tram':
continue
if labeldata[0] == 'Person_sitting':
continue
if labeldata[0] == 'Pedestrain':
continue
if labeldata[0] == 'Cyclist'
new_txt.append(merge(labeldata)) # 重新写入新的txt文件
with open(item,'w+') as w_tdf: # w+是打开原文件将内容删除,另写新内容进去
for temp in new_txt:
w_tdf.write(temp)
except IOError as ioerr:
print('File error:'+str(ioerr))
print('\nafter modify categories are:\n')
show_category(txt_list)
第2步:需要首先将具有独特数据格式的标签txt转为换为xml数据格式。
在这里大家可以先了解下voc的数据排列格式,不过也不要紧。这里只用到了打对勾的三个文件夹。第一个用来放待会儿生成xml格式的标签数据;JPEGImages是储存了所有标签对应的图片;training里面是数据集的标签数据(官网下载下来后直接解压到这里的,当然与图片是对应的)。
一下代码表示的是处理有关'Car', 'Truck', 'Van'三类标签的数据将路径C:/Users/16942/Desktop/v7/kitti/voc/training/label_2里的txt标签转换为xmi的格式储存在
C:/Users/16942/\Desktop/v7\kitti/voc/Annotations/路径下
# kitti_txt_to_xml.py
# encoding:utf-8
# 根据一个给定的XML Schema,使用DOM树的形式从空白文件生成一个XML
from xml.dom.minidom import Document
import cv2
import glob
import os
def generate_xml(name,split_lines,img_size,class_ind):
doc = Document() # 创建DOM文档对象
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
title = doc.createElement('folder')
title_text = doc.createTextNode('KITTI')
title.appendChild(title_text)
annotation.appendChild(title)
img_name=name+'.jpg'
title = doc.createElement('filename')
title_text = doc.createTextNode(img_name)
title.appendChild(title_text)
annotation.appendChild(title)
source = doc.createElement('source')
annotation.appendChild(source)
title = doc.createElement('database')
title_text = doc.createTextNode('The KITTI Database')
title.appendChild(title_text)
source.appendChild(title)
title = doc.createElement('annotation')
title_text = doc.createTextNode('KITTI')
title.appendChild(title_text)
source.appendChild(title)
size = doc.createElement('size')
annotation.appendChild(size)
title = doc.createElement('width')
title_text = doc.createTextNode(str(img_size[1]))
title.appendChild(title_text)
size.appendChild(title)
title = doc.createElement('height')
title_text = doc.createTextNode(str(img_size[0]))
title.appendChild(title_text)
size.appendChild(title)
title = doc.createElement('depth')
title_text = doc.createTextNode(str(img_size[2]))
title.appendChild(title_text)
size.appendChild(title)
for split_line in split_lines:
line=split_line.strip().split()
if line[0] in class_ind:
object = doc.createElement('object')
annotation.appendChild(object)
title = doc.createElement('name')
title_text = doc.createTextNode(line[0])
title.appendChild(title_text)
object.appendChild(title)
bndbox = doc.createElement('bndbox')
object.appendChild(bndbox)
title = doc.createElement('xmin')
title_text = doc.createTextNode(str(int(float(line[4]))))
title.appendChild(title_text)
bndbox.appendChild(title)
title = doc.createElement('ymin')
title_text = doc.createTextNode(str(int(float(line[5]))))
title.appendChild(title_text)
bndbox.appendChild(title)
title = doc.createElement('xmax')
title_text = doc.createTextNode(str(int(float(line[6]))))
title.appendChild(title_text)
bndbox.appendChild(title)
title = doc.createElement('ymax')
title_text = doc.createTextNode(str(int(float(line[7]))))
title.appendChild(title_text)
bndbox.appendChild(title)
# 将DOM对象doc写入文件
f = open('C:/Users/16942/\Desktop/v7\kitti/voc/Annotations/'+name+'.xml','w')#xml
f.write(doc.toprettyxml(indent = ''))
f.close()
if __name__ == '__main__':
class_ind=('Car', 'Truck', 'Van')
# cur_dir=os.getcwd()
labels_dir= "C:/Users/16942/Desktop/v7/kitti/voc/training/label_2"
# labels_dir=os.path.join(cur_dir,'label_2')
for parent, dirnames, filenames in os.walk(labels_dir):# 分别得到根目录,子目录和根目录下文件
for file_name in filenames:
full_path=os.path.join(parent, file_name) # 获取文件全路径
f=open(full_path)
split_lines = f.readlines() #以行为单位读
name= file_name[:-4] # 后四位是扩展名.txt,只取前面的文件名
img_name=name+'.png'
img_path=os.path.join('C:/Users/16942/Desktop/v7/kitti/voc/JPEGImages',img_name) # 路径需要自行修改
img_size=cv2.imread(img_path).shape
generate_xml(name,split_lines,img_size,class_ind)
print('txts has converted into xmls')
最终得到的xml为以下的形式:包含文件夹、size及object3个主要信息。
第3步:xml格式转换为yolo所需的txt格式
# xml_to_yolo_txt.py
# 此代码和VOC_KITTI文件夹同目录
import glob
import xml.etree.ElementTree as ET
# 这里的类名为我们xml里面的类名,顺序现在不需要考虑
class_names = ['Car', 'Truck', 'Van']
# xml文件路径
path = 'E:/datasets/yolov5-6.0/mydata/labels/test/'
# 转换一个xml文件为txt
def single_xml_to_txt(xml_file):
tree = ET.parse(xml_file)
root = tree.getroot()
# 保存的txt文件路径
# txt_file = xml_file.split('.')[0]+'.txt'
txt_file = xml_file.split('.')[0] + '.txt'#在点处将文件名分为俩个部分,新建的txt_file文件是由分开的第一部部分加上.txt合成。
with open(txt_file, 'w') as txt_file:
for member in root.findall('object'):
print('xmls has converted into txts')
#filename = root.find('filename').text
picture_width = int(root.find('size')[0].text)
picture_height = int(root.find('size')[1].text)
class_name = member[0].text
# 类名对应的index
class_num = class_names.index(class_name)
box_x_min = int(member[1][0].text) # 左上角横坐标
box_y_min = int(member[1][1].text) # 左上角纵坐标
box_x_max = int(member[1][2].text) # 右下角横坐标
box_y_max = int(member[1][3].text) # 右下角纵坐标
# 转成相对位置和宽高
x_center = float(box_x_min + box_x_max) / (2 * picture_width)
y_center = float(box_y_min + box_y_max) / (2 * picture_height)
width = float(box_x_max - box_x_min) / picture_width
height = float(box_y_max - box_y_min) / picture_height
print(class_num, x_center, y_center, width, height)
txt_file.write(str(class_num) + ' ' + str(x_center) + ' ' + str(y_center) + ' ' + str(width) + ' ' + str(height) + '\n')
# 转换文件夹下的所有xml文件为txt
def dir_xml_to_txt(path):
for xml_file in glob.glob(path + '*.xml'):
print(xml_file)
single_xml_to_txt(xml_file)
dir_xml_to_txt(path)
第4步:划分数据集 -这里是8:1:1,也可以灵活改变;
import os
import random
from shutil import copy2
# 源文件夹路径
file_path = r"C:/Users/16942/Desktop/v7/data_new/train/image"
# 新文件路径
new_file_path = r"C:\Users\16942\Desktop\v7\dataset_kitti\images"
# 划分数据比例6:2:2
split_rate = [0.8, 0.1, 0.1]
class_names = os.listdir(file_path)
# 目标文件夹下创建文件夹
split_names = ['train', 'val', 'test']
print(class_names) # ['00000.jpg', '00001.jpg', '00002.jpg'... ]
# 判断是否存在目标文件夹,不存在则创建---->创建train\val\test文件夹
if os.path.isdir(new_file_path):
pass
else:
os.makedirs(new_file_path)
for split_name in split_names:
split_path = new_file_path + "/" + split_name
print(split_path) # D:/Code/Data/GREENTdata/train, val, test
if os.path.isdir(split_path):
pass
else:
os.makedirs(split_path)
# 按照比例划分数据集,并进行数据图片的复制
for class_name in class_names:
current_data_path = file_path # D:/Code/Data/centerlinedata/tem_voc/JPEGImages/
current_all_data = os.listdir(current_data_path)
current_data_length = len(current_all_data) # 文件夹下的图片个数
current_data_index_list = list(range(current_data_length))
random.shuffle(current_data_index_list)
train_path = os.path.join(new_file_path, 'train/') # D:/Code/Data/GREENTdata/train/
val_path = os.path.join(new_file_path, 'val/') # D:/Code/Data/GREENTdata/val/
test_path = os.path.join(new_file_path, 'test/') # D:/Code/Data/GREENTdata/test/
train_stop_flag = current_data_length * split_rate[0]
val_stop_flag = current_data_length * (split_rate[0] + split_rate[1])
current_idx = 0
train_num = 0
val_num = 0
test_num = 0
# 图片复制到文件夹中
for i in current_data_index_list:
src_img_path = os.path.join(current_data_path, current_all_data[i])
if current_idx <= train_stop_flag:
copy2(src_img_path, train_path)
train_num += 1
elif (current_idx > train_stop_flag) and (current_idx <= val_stop_flag):
copy2(src_img_path, val_path)
val_num += 1
else:
copy2(src_img_path, test_path)
test_num += 1
current_idx += 1
print("Done!", train_num, val_num, test_num)