KITTI人、车物体检测
实现不是从零开始,而是将别人关键代码,复制进自己的项目
复现步骤:熟悉算法思想、介绍相关应用、分模块进行实战练习
v3引入了残差模块,并进一步加深了网络,改进后的网络有53个卷积层。借鉴了FPN的思想,从不同尺寸提取特征。
为了解决之前YOLO版本对小目标不敏感的问题,采用了3个不同尺度的特征图来进行目标检测。借鉴了FPN的思想,通过一系列的卷积层和上采样对各尺度的特征图进行融合。
项目步骤分析
1、数据集类型转换、KITTI转换成TFRecords
2、KITTI案例训练代码实现
3、图片和视频的检测代码实现
数据集转换为TFRecords文件
tensorflow就可以高效地读取和处理这些数据集,从而帮助我们更高效地进行大规模的模型训练
格式:TFRecord可以理解为一系列序列化的tf.train.Example元素所组成的列表文件。而每一个tf.train.Example又由若干个tf.train.Feature的字典组成
保存TFRecord格式:
读取该数据到内存、将该元素转换为tf.train.Example对象、将该tf.train.Example对象序列化为字符串1,并通过预定义的tf.io.TFRecordWriter写入TFRecord
train_filenames=train_cat+train_dog
train_labels=[0]*len(train_cat_filename)+[1]*len(train_dog_filenames)
#迭代读取每张图片,建立字典和对象
with tf.io.TFRecordWriter(tfrecord_file) as writer:
for filename、label in zip(train_filenames,train_labels):
image=open(filename,'rb').read()
feature={'image':
'label':
}
通过字典建立example
example=tf.train.Example()
将example序列化并写入到TFrecord文件中
writer.write(example.SerializeTpstring())
#读取TFRecord文件
raw_dataset=tf.data.TFRecordDataset(tfrecord_file)
feature_description={}
#将tfrecord文件中的每一个序列化的tf.train.Example解码
def parse_example(example_string):
feature_dict=tf.io.parse_single_example(example_string,feature_description)
feature_dict['image']=tf.io.decode_jpeg(feature_dict['image'])
return feature_dict['image'],feature_dict['label']
dataset=raw_dataset.map(parse_example)
for image,label in dataset:
print(image,label)
KITTI数据集转换为TFRecords文件
步骤:进行读取主逻辑函数过程编写,指定需要传递的命令行参数、读取文件标准信息,过滤标注信息、进行构造example的feature字典
def convert_kitti_to_tfrecords(data_dir,output_path,classes_to_use,validation_set_size):
#1.创建KITTI训练和验证集的tfrecord位置
#图片位置
image_dir=os.path.join(data_dir,'data_object_image_2','training','image_2')
#标注信息位置
annotation_dir=os.path.join(data_dir,'training','label_2')
train_writter=tf.io.TFRecordWriter(output_path+'train.tfrecord')
val_writer=tf.io.TFRecordWriter(output_path+'val.tfrecord')
#列出所有的图片片,进行每张图片的内容和标准信息的获取写到tfrecord中
images=sorted(os.listdir(image_dir))
for img_name in images:
#获取当前图片的编号数据
img_num=int(img_name.split('. ')[0])
#读取标签文件的函数
img_anno=read_annotation_file('label路径')
#写入训练集和验证集
image_path=os.path.join(image_dir,img_name)
example=prepare_example(image_path,annotations)
train_writer.writer(example.SerializeToString())
train_writer.close()
读取标签文件的函数
def read_annotation_file(filename):
with open(filename) as f:
content=f.readlines()
content=[x.strip().split(' ') for x in content]
#保存内容到字典格式
anno=dict()
anno['type']=np.array([x[0].lower() for x in content])
anno['truncated'] = np.array([float(x[1]) for x in content])
anno['occluded'] = np.array([int(x[2]) for x in content])
anno['alpha'] = np.array([float(x[3]) for x in content])
anno['2d_bbox_left'] = np.array([float(x[4]) for x in content])
anno['2d_bbox_top'] = np.array([float(x[5]) for x in content])
anno['2d_bbox_right'] = np.array([float(x[6]) for x in content])
anno['2d_bbox_right'] = np.array([float(x[6]) for x in content])
return anno
构造example字典
def prepare_example(image_path,annotation):
with open(image_path,'rb') as fid:
encoded_png=fid.read()
encoded_png.io=io.BytesIO(encoded_png)
image=pil.open(encoded_png_io)
image=np.asarray(image)
#构造协议中需要的字典的值
key = hashlib.sha256(encoded_png).hexdigest()
width = int(image.shape[1])
height = int(image.shape[0])
#坐标归一化
xmin_norm = annotations['2d_bbox_left'] / float(width)
ymin_norm = annotations['2d_bbox_top'] / float(height)
xmax_norm = annotations['2d_bbox_right'] / float(width)
ymax_norm = annotations['2d_bbox_bottom'] / float(height)
#其他信息,难度以及字符串类别
difficult_obj = [0] * len(xmin_norm)
classes_text = [x.encode('utf8') for x in annotations['type']]
example=tf.train.Example{features=tf.train.Features(feature={
'image/width':tf.train.Feature(int64_list=tf.train.Int64List(value=[width]))
})}
return example
训练
train_dataset=dataset.load_tfrecord_dataset(dataset,classes)
train_dataset=train_dataset.shuffle(buffer_size=1024)
train_dataset=train_dataset.batch()
train_dataset=train_dataset.map()
train_dataset=train_dataset.prefetch()
yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
(59, 119), (116, 90), (156, 198), (373, 326)],
np.float32) / 416
yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])
model=Yolov3(args.size,train=True,classes=num_classes)
anchor=yolo_anchors
anchor_masks=yolo_anchor_masks
optimizer = tf.keras.optimizers.Adam(lr=args.learning_rate)
loss = [YoloLoss(anchors[mask], classes=args.num_classes)
for mask in anchor_masks]
model.compile(optimizer=optimizer, loss=loss)
callbacks = [
EarlyStopping(patience=3, verbose=1),
ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
verbose=1, save_weights_only=True),ensorBoard(log_dir='logs')
]
model.fit(train_dataset,
epochs=args.epochs,
callbacks=callbacks,
validation_data=val_dataset)
测试
yolo = YoloV3(classes=args.num_classes)
yolo.load_weights(args.weights)
class_names = [c.strip() for c in open(args.classes).readlines()]
img = tf.image.decode_image(open(args.image, 'rb').read(), channels=3)
img = tf.expand_dims(img, 0)
img = transform_images(img, args.size)
boxes, scores, classes, nums = yolo(img)
img = cv2.imread(args.image)
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
cv2.imwrite(args.output, img)