Transform KITTI tracking to normalized format which is adapt to JDE
- Tracking 比 Detection 多一级子目录,稍微显得复杂
- 此处认为 Van 和 car 都视为 Vehicles
- 同时还生成了 .train个使得文件,直接用于训练
from glob import glob
import numpy as np
import os
class ConvertTxt(object):
def __init__(self):
self.label = r"E:\8_DataSet\KITTI_tracking\label_02" # 原始标签文件
self.label_ids = r"E:\8_DataSet\KITTI_tracking\labels_with_ids" # 目标标签文件目录
self.folder = "0000"
self.file = "0000.txt"
self.label_subdir = None
@staticmethod
def generate_kitti_train():
# 注意本处生成的是以image为基准的train文件,包含所有图片,而通常情况下筛选后的lebels_with_ids则不行,会偏少
# 修改就是使用 lebels_with_ids 为基准,然后将txt 使用replace 方法改为png
path = os.walk(r"E:\8_DataSet\KITTI_tracking\image_02")
for root, directories, files in path:
for _dir in directories:
line = "KITTI/image_02/{}/".format(_dir)
_dir = os.path.join(r"E:\8_DataSet\KITTI_tracking\image_02", _dir)
# dir_list.append(_dir)
txt_list = os.listdir(_dir)
with open("kitti-img.train", 'a') as f:
for item in txt_list:
line1 = line + item
f.writelines(line1)
f.writelines("\n")
'''
type--Describes the type of object:'Car', 'Van', 'Truck', 'Pedestrian', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare'
bbox:
bbox 2D bounding box of object in the image (0-based index): contains `left, top, right, bottom` pixel coordinate
'''
def convert_label_with_id(self):
label_txt = os.path.join(self.label, self.file)
self.label_subdir = os.path.join(self.label_ids, self.folder)
frame_array = []
save_lines = []
if not os.path.exists(self.label_subdir):
os.mkdir(self.label_subdir)
with open(label_txt) as f:
lines = f.readlines()
for line in lines:
temp_list = line.strip("\n").split(" ")
_frame, _id, _type, l, t, r, b = temp_list[0], temp_list[1], temp_list[2], temp_list[6], temp_list[7], \
temp_list[8], temp_list[9]
if _type == 'Car' or _type == 'Van':
frame_array.append(_frame)
# TODO 是否将id赋值为 -1
xc, yc, w, h = self.lrtb2cxcywh(l, t, r, b)
l, t, r, b = xc, yc, w, h # 正则化后的坐标,yolo格式
# save_line = "{} {} {} {} {} {}".format(0, -1, l, t, r, b)
save_line = "{} {} {} {} {} {}".format(0, _id, l, t, r, b)
save_lines.append(save_line)
return frame_array, save_lines
def write_one_file(self, frame_array, save_lines):
number_list, value_list = self.clasify_frames(frame_array)
line_count = 0
for i in range(len(value_list)):
_th, _hu, _ten, _n = self.transfer_int2txt(value_list[i])
val_txt = "00{}{}{}{}.txt".format(_th, _hu, _ten, _n)
txt_path = os.path.join(self.label_subdir, val_txt)
with open(txt_path, 'w') as f:
for j in range(int(number_list[i])):
f.writelines(str(save_lines[line_count]))
f.writelines("\n")
line_count += 1
def write_files(self):
file_list = []
path = os.walk(self.label)
for root, directories, files in path:
for file in files:
# dir = os.path.join(path,directory)
file_list.append(file)
for file in file_list:
self.file = file
self.folder = file.split(".")[0]
frame_array, save_lines = self.convert_label_with_id()
self.write_one_file(frame_array, save_lines)
def clasify_frames(self, vec):
'''
input a sorted list or a array
return a list whose element is tuple type, (value, number)
'''
length = len(vec)
left = 0
frame_list = []
name_list = []
for i in range(length):
if vec[left] != vec[i]:
frame_num = i - left
frame_list.append(frame_num)
name_list.append(vec[left])
left = i
if i == length - 1:
frame_list.append(i - left + 1)
name_list.append(vec[left])
return frame_list, name_list
@staticmethod
def transfer_int2txt(frme):
frme = int(frme)
if frme == 0:
_n = 0
else:
_n = frme % 10
if frme >= 10:
_ten = int(frme / 10) % 10
else:
_ten = 0
if frme >= 100:
_hu = int(frme / 100) % 10
else:
_hu = 0
if frme >= 1000:
_th = int(frme / 1000) % 10
else:
_th = 0
return _th, _hu, _ten, _n
def lrtb2cxcywh(self, l, t, r, b):
l, t, r, b = float(l), float(t), float(r), float(b)
image_w, image_h = 1242, 375
xc, yc, w, h = self.pascal_voc_to_yolo(l, t, r, b, image_w, image_h)
return xc, yc, w, h
# KITTI tracking 2D 的高宽通道数分别为 375 1242 3
# Convert Pascal_Voc bb to Yolo
# [x_min, y_min, x_max, y_max] ---> [x_center, y_center, width, height]
def pascal_voc_to_yolo(self, x1, y1, x2, y2, image_w, image_h):
return [((x2 + x1) / (2 * image_w)), ((y2 + y1) / (2 * image_h)), (x2 - x1) / image_w, (y2 - y1) / image_h]
covert_test = ConvertTxt()
covert_test.write_files()