工作记录–在VecanLab的第470天
本周完成了tensorflow object detection API所接收的数据的准备,跑通了官方提供的训练代码,目前使用ssd架构的网络训练的红绿灯检测模型准确率和召回率都较低,正在尝试使用faster r-cnn架构的网络训练,同时增加了使用的LISA训练数据,但训练时准确率和召回率为零,原因未知,待查。下周准备把faster r-cnn用起来做红绿灯检测。继续读目标跟踪相关论文和代码。希望能把目标检测和目标跟踪算法结合起来。
从CSV格式的标注文件生成TFRecord标注文件
这是一个拖了很久才解决的问题,我终于把数据接口打通了。
利用官方给的如下的instruction先跑了一下Oxford-IIIT Pet 数据集生成TFRecord的代码。
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
tar -xvf annotations.tar.gz
tar -xvf images.tar.gz
python object_detection/dataset_tools/create_pet_tf_record.py \
--label_map_path=object_detection/data/pet_label_map.pbtxt \
--data_dir=`pwd` \
--output_dir=`pwd`
确认代码可以用之后,在create_kitti_tf_record.py的基础上进行修改,具体代码如下
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 10 16:19:07 2019
@author: lxy
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import os
import csv
import numpy as np
import PIL.Image as pil
import tensorflow as tf
import re
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
from object_detection.utils.np_box_ops import iou
tf.app.flags.DEFINE_string('data_dir', '/media/swj/Darknet/dark_mission/trafficLight/LISA/dayTrain/', 'Location of root directory for the '
'data. Folder structure is assumed to be:'
'<data_dir>/training/label_2 (annotations) and'
'<data_dir>/data_object_image_2/training/image_2'
'(images).')
tf.app.flags.DEFINE_string('output_path', '/media/swj/Darknet/dark_mission/trafficLight/LISA/dayTrain/', 'Path to which TFRecord files'
'will be written. The TFRecord with the training set'
'will be located at: <output_path>_train.tfrecord.'
'And the TFRecord with the validation set will be'
'located at: <output_path>_val.tfrecord')
tf.app.flags.DEFINE_string('classes_to_use', 'car,pedestrian,dontcare',
'Comma separated list of class names that will be'
'used. Adding the dontcare class will remove all'
'bboxs in the dontcare regions.')
tf.app.flags.DEFINE_string('label_map_path', 'data/light_label_map.pbtxt',
'Path to label map proto.')
tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to'
'be used as a validation set.')
FLAGS = tf.app.flags.FLAGS
#将LISA数据集给的csv文件拆分成多个csv文件,每张图片对应一个csv标注文件
def convert_LISA_annotation_to_separated_annotations():
root_dir = '/media/swj/Darknet/dark_mission/trafficLight/LISA/dayTrain/'
for subdir in os.listdir(root_dir):
f = open(root_dir+subdir+'/frameAnnotationsBULB.csv')
for line in f:
a_line = line.rstrip('\n').split(';')
if re.search('/',a_line[0]):
pic_name = a_line[0].split('/')[1]
out = open(root_dir+subdir+'/'+pic_name[:-4]+".csv", 'a', newline='')
csv_write = csv.writer(out, dialect='excel')
csv_write.writerow(a_line)
#将拆分后的csv文件转换成TFRecord文件
def convert_LISA_to_tfrecords(data_dir, output_path, classes_to_use,
label_map_path, validation_set_size):
label_map_dict = label_map_util.get_label_map_dict(label_map_path)
train_count = 0
val_count = 0
for sub_dir in os.listdir(data_dir):
image_dir = data_dir+sub_dir+'/frames'
train_writer = tf.python_io.TFRecordWriter(data_dir+'traffic_light_train.record-'+sub_dir+'-of-00013')
val_writer = tf.python_io.TFRecordWriter(data_dir+'traffic_light_val.record-'+sub_dir+'-of-00013')
images = sorted(tf.gfile.ListDirectory(image_dir))
for img_name in images:
img_num = int(img_name.split('.')[0].split('--')[1])
is_validation_img = img_num < validation_set_size
if os.path.exists(os.path.join(data_dir,sub_dir,img_name[:-4]+'.csv')):
img_anno = read_annotation_file(os.path.join(data_dir,sub_dir,img_name[:-4]+'.csv'))
image_path = os.path.join(image_dir, img_name)
annotation_for_image = img_anno
#将read_annotation_file()函数生成的dict类型的标注传给prepare_example()函数生成tf.train.Example
example = prepare_example(image_path, annotation_for_image, label_map_dict)
if is_validation_img:
#example.SerializeToString()是将Example中的map压缩为二进制文件
val_writer.write(example.SerializeToString())
val_count += 1
else:
train_writer.write(example.SerializeToString())
train_count += 1
train_writer.close()
val_writer.close()
#将每一个标注文件转成dict字典
def read_annotation_file(filename):
with open(filename) as f:
content = f.readlines()
content = [x.strip().split(',') for x in content]
anno = {}
anno['type'] = np.array([x[1] for x in content])
anno['2d_bbox_left'] = np.array([float(x[2]) for x in content])
anno['2d_bbox_top'] = np.array([float(x[3]) for x in content])
anno['2d_bbox_right'] = np.array([float(x[4]) for x in content])
anno['2d_bbox_bottom'] = np.array([float(x[5]) for x in content])
return anno
#将图片和标注转换为Example协议块
def prepare_example(image_path, annotations, label_map_dict):
with tf.gfile.GFile(image_path, 'rb') as fid:
encoded_png = fid.read()
encoded_png_io = io.BytesIO(encoded_png)
image = pil.open(encoded_png_io)
image = np.asarray(image)
key = hashlib.sha256(encoded_png).hexdigest()
width = int(image.shape[1])
height = int(image.shape[0])
xmin_norm = annotations['2d_bbox_left'] / float(width)
ymin_norm = annotations['2d_bbox_top'] / float(height)
xmax_norm = annotations['2d_bbox_right'] / float(width)
ymax_norm = annotations['2d_bbox_bottom'] / float(height)
#(1)tf.train.Example(features = None) 这里的features是tf.train.Features类型的特征实例。
#(2)tf.train.Features(feature = None) 这里的feature是以字典的形式存在,*key:要保存数
#据的名字 value:要保存的数据,但是格式必须符合tf.train.Feature实例要求
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_png),
'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
'image/object/class/text': dataset_util.bytes_list_feature(
[x.encode('utf8') for x in annotations['type']]),
'image/object/class/label': dataset_util.int64_list_feature(
[label_map_dict[x] for x in annotations['type']]),
}))
return example
def main(_):
convert_LISA_annotation_to_separated_annotations()
convert_LISA_to_tfrecords(
data_dir=FLAGS.data_dir,
output_path=FLAGS.output_path,
classes_to_use=FLAGS.classes_to_use.split(','),
label_map_path=FLAGS.label_map_path,
validation_set_size=FLAGS.validation_set_size)
if __name__ == '__main__':
tf.app.run()
关于目标跟踪的了解
为了利用连续帧的信息,我查阅了相关资料,发现有目标跟踪这个研究领域
,而且CVPR2019的两篇oral都是目标跟踪这一领域的文章,于是拜读了一下这篇论文《Fast Online Object Tracking and Segmentation: A Unifying Approach》
这篇文章中,作者提出了一种实现视觉目标跟踪和半监督视频目标分割的方法。方法被命名为SiamMask,这种方法用二值化分割任务通过增强损失改善了目前流行的全卷积Siamese方法的训练流程。SiamMask只依赖起始帧单个bounding box的输入就可以输出后续的目标分割掩膜和可旋转bounding box,速度达到55fps。官方网址SiamMask
目前只是粗浅地读了一下这篇论文,后面会再深入地读。
在本地的ubuntu系统上跑通了作者给出的测试代码,还没有仔细读,之后还要好好读代码,修改接口,重新训练,希望可以实现红绿灯的跟踪。
参考文献
- https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/preparing_inputs.md
- https://www.jianshu.com/p/b480e5fcb638
- http://www.robots.ox.ac.uk/~qwang/SiamMask/
- https://arxiv.org/pdf/1812.05050.pdf
- https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_locally.md