TensorFlow-YOLO_V1测试代码梳理,相信看过后对yolo-v1的整个流程都一目了然了。
硬件:NVIDIA-GTX1080
软件:Windows7、python3.6.5、tensorflow-gpu-1.4.0
一、基础知识
一图看懂:https://download.csdn.net/download/samylee/10879852
二、代码展示(做了详细注释)
1、./test.py
import cv2
import numpy as np
import tensorflow as tf
import yolo.config as cfg
from yolo.yolo_net import YOLONet
class Detector(object):
def __init__(self, net, weight_file):
self.net = net
self.weights_file = weight_file
# 类别
self.classes = cfg.CLASSES
# 类别数
self.num_class = len(self.classes)
# 训练图像大小
self.image_size = cfg.IMAGE_SIZE
# 单元格数量
self.cell_size = cfg.CELL_SIZE
# 每个单元格产生边界框数
self.boxes_per_cell = cfg.BOXES_PER_CELL
# 目标阈值
self.threshold = cfg.THRESHOLD
# NMS-IOU阈值
self.iou_threshold = cfg.IOU_THRESHOLD
# 取输出类别条件概率(:boundary1)
self.boundary1 = self.cell_size * self.cell_size * self.num_class
# 取输出各边界框置信度(boundary1:boundary2),(boundary2:)为各边界框
self.boundary2 = self.boundary1 +\
self.cell_size * self.cell_size * self.boxes_per_cell
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
print('Restoring weights from: ' + self.weights_file)
self.saver = tf.train.Saver()
self.saver.restore(self.sess, self.weights_file)
def draw_result(self, img, result):
# xmin = xmid - width/2
# ymin = ymid - height/2
# xmax = xmid + width/2
# ymax = ymax + height/2
for i in range(len(result)):
x = int(result[i][1])
y = int(result[i][2])
w = int(result[i][3] / 2)
h = int(result[i][4] / 2)
cv2.rectangle(img, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
cv2.rectangle(img, (x - w, y - h - 20),
(x + w, y - h), (125, 125, 125), -1)
lineType = cv2.LINE_AA if cv2.__version__ > '3' else cv2.CV_AA
cv2.putText(
img, result[i][0] + ' : %.2f' % result[i][5],
(x - w + 5, y - h - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 0, 0), 1, lineType)
def detect(self, img):
img_h, img_w, _ = img.shape
inputs = cv2.resize(img, (self.image_size, self.image_size))
inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
inputs = (inputs / 255.0) * 2.0 - 1.0
inputs = np.reshape(inputs, (1, self.image_size, self.image_size, 3))
result = self.detect_from_cvmat(inputs)[0]
# 根据原始图像缩放比例,复原原始图像标签
for i in range(len(result)):
result[i][1] *= (1.0 * img_w / self.image_size)
result[i][2] *= (1.0 * img_h / self.image_size)
result[i][3] *= (1.0 * img_w / self.image_size)
result[i][4] *= (1.0 * img_h / self.image_size)
return result
def detect_from_cvmat(self, inputs):
net_output = self.sess.run(self.net.logits,
feed_dict={self.net.images: inputs})
results = []
for i in range(net_output.shape[0]):
results.append(self.interpret_output(net_output[i]))
return results
def interpret_output(self, output):
# 单元块各类置信度 = 单元块各类条件概率 * 单元块各边界框置信度 (7x7x2x20)
probs = np.zeros((self.cell_size, self.cell_size,
self.boxes_per_cell, self.num_class))
# 单元块各类条件概率: 7x7x20
class_probs = np.reshape(
output[0:self.boundary1],
(self.cell_size, self.cell_size, self.num_class))
# 单元块各边界框置信度: 7x7x2
scales = np.reshape(
output[self.boundary1:self.boundary2],
(self.cell_size, self.cell_size, self.boxes_per_cell))
# 单元块各边界框: 7x7x2x4
boxes = np.reshape(
output[self.boundary2:],
(self.cell_size, self.cell_size, self.boxes_per_cell, 4))
# x_offset,y_offset: 7x7
# ([0,0],[0,1],[0,2]...,[0,6]
# [1,0],[1,1],[1,2]...,[1,6]
# ...
# [6,0],[6,1],[6,2]...,[6,6])
offset = np.array(
[np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell)
offset = np.transpose(
np.reshape(
offset,
[self.boxes_per_cell, self.cell_size, self.cell_size]),
(1, 2, 0))
# x_real = (x_out + x_offset) * image_size / cell_size
# y_real = (y_out + y_offset) * image_size / cell_size
boxes[:, :, :, 0] += offset
boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) # y_offset: 7x7x2
boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, 0:2] * (self.image_size / self.cell_size)
# w_real = square(w_out) * image_size
# h_real = square(h_out) * image_size
boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:]) * self.image_size
# 单元块各类置信度 = 单元块各类条件概率 * 单元块各边界框置信度 (7x7x2x20)
for i in range(self.boxes_per_cell):
for j in range(self.num_class):
probs[:, :, i, j] = np.multiply(
class_probs[:, :, j], scales[:, :, i])
# 单元块各类置信度(>=threshold: true, <threshold: false)
filter_mat_probs = np.array(probs >= self.threshold, dtype='bool')
# 找出置信度大于threshold的boxes下标
filter_mat_boxes = np.nonzero(filter_mat_probs)
# 找出置信度大于threshold的classes(非下标)
filter_mat_classes = np.argmax(filter_mat_probs, axis=3)
# 置信度大于threshold的probs
probs_filtered = probs[filter_mat_probs]
# 对应置信度大于threshold的boxes
boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1], filter_mat_boxes[2]]
# 对应置信度大于threshold的classes
classes_filtered = filter_mat_classes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]
# 输出降序排列下标
argsort = np.array(np.argsort(probs_filtered))[::-1]
# probs_filtered降序排列
probs_filtered = probs_filtered[argsort]
# boxes_filtered降序排列
boxes_filtered = boxes_filtered[argsort]
# classes_filtered降序排列
classes_filtered = classes_filtered[argsort]
# NMS(所有类别一视同仁!!!有问题???)
# 应该是不同类别分别做NMS!!!
for i in range(len(boxes_filtered)):
if probs_filtered[i] == 0:
continue
for j in range(i + 1, len(boxes_filtered)):
if self.iou(boxes_filtered[i], boxes_filtered[j]) > self.iou_threshold:
probs_filtered[j] = 0.0
# 取NMS的结果
filter_iou = np.array(probs_filtered > 0.0, dtype='bool')
boxes_filtered = boxes_filtered[filter_iou]
probs_filtered = probs_filtered[filter_iou]
classes_filtered = classes_filtered[filter_iou]
result = []
for i in range(len(boxes_filtered)):
result.append(
[self.classes[classes_filtered[i]],
boxes_filtered[i][0],
boxes_filtered[i][1],
boxes_filtered[i][2],
boxes_filtered[i][3],
probs_filtered[i]])
return result
# IOU = 交并比
def iou(self, box1, box2):
tb = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - \
max(box1[0] - 0.5 * box1[2], box2[0] - 0.5 * box2[2])
lr = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - \
max(box1[1] - 0.5 * box1[3], box2[1] - 0.5 * box2[3])
inter = 0 if tb < 0 or lr < 0 else tb * lr
return inter / (box1[2] * box1[3] + box2[2] * box2[3] - inter)
def image_detector(self, imname, wait=0):
image = cv2.imread(imname)
# detection
result = self.detect(image)
self.draw_result(image, result)
cv2.imshow('Image', image)
cv2.waitKey(wait)
def main():
yolo = YOLONet(False)
weight_file = "model/YOLO_small.ckpt"
detector = Detector(yolo, weight_file)
imname = 'person.jpg'
detector.image_detector(imname)
if __name__ == '__main__':
main()
2、./yolo/config.py
import os
CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
'train', 'tvmonitor']
IMAGE_SIZE = 448
CELL_SIZE = 7
BOXES_PER_CELL = 2
ALPHA = 0.1
THRESHOLD = 0.2
IOU_THRESHOLD = 0.5
3、./yolo/yolo_net.py
import numpy as np
import tensorflow as tf
import yolo.config as cfg
slim = tf.contrib.slim
class YOLONet(object):
def __init__(self, is_training=True):
# 类别
self.classes = cfg.CLASSES
# 类别数
self.num_class = len(self.classes)
# 训练图像大小
self.image_size = cfg.IMAGE_SIZE
# 单元格数量
self.cell_size = cfg.CELL_SIZE
# 每个单元格产生边界框数
self.boxes_per_cell = cfg.BOXES_PER_CELL
# 网络输出尺寸
self.output_size = (self.cell_size * self.cell_size) *\
(self.num_class + self.boxes_per_cell * 5)
# leaky_relu系数(tf.maximum(alpha*x,x))
self.alpha = cfg.ALPHA
# 边界框的中心坐标x: 相对于各自cell左上点的偏移量
self.offset = np.transpose(np.reshape(np.array(
[np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
(self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
# 训练or测试输入图像大小
self.images = tf.placeholder(
tf.float32, [None, self.image_size, self.image_size, 3],
name='images')
# 建立网络
self.logits = self.build_network(
self.images, num_outputs=self.output_size, alpha=self.alpha,
is_training=is_training)
# conv2d(inputs, output_size, ksize, strides = [1, 1], padding = 'SAME')
# max_pool2d(inputs, ksize, strides = [2, 2])
def build_network(self,images,num_outputs,alpha,keep_prob=0.5,is_training=True,scope='yolo'):
with tf.variable_scope(scope):
# slim处理conv2d和fully_connected
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=leaky_relu(alpha),
weights_regularizer=slim.l2_regularizer(0.0005),
weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
):
# (batch, 448, 448, 3) -> (batch, 454, 454, 3)
net = tf.pad(images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),name='pad_1')
net = slim.conv2d(net, 64, 7, 2, padding='VALID', scope='conv_2')
net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
net = slim.conv2d(net, 192, 3, scope='conv_4')
net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
net = slim.conv2d(net, 128, 1, scope='conv_6')
net = slim.conv2d(net, 256, 3, scope='conv_7')
net = slim.conv2d(net, 256, 1, scope='conv_8')
net = slim.conv2d(net, 512, 3, scope='conv_9')
net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
net = slim.conv2d(net, 256, 1, scope='conv_11')
net = slim.conv2d(net, 512, 3, scope='conv_12')
net = slim.conv2d(net, 256, 1, scope='conv_13')
net = slim.conv2d(net, 512, 3, scope='conv_14')
net = slim.conv2d(net, 256, 1, scope='conv_15')
net = slim.conv2d(net, 512, 3, scope='conv_16')
net = slim.conv2d(net, 256, 1, scope='conv_17')
net = slim.conv2d(net, 512, 3, scope='conv_18')
net = slim.conv2d(net, 512, 1, scope='conv_19')
net = slim.conv2d(net, 1024, 3, scope='conv_20')
net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
net = slim.conv2d(net, 512, 1, scope='conv_22')
net = slim.conv2d(net, 1024, 3, scope='conv_23')
net = slim.conv2d(net, 512, 1, scope='conv_24')
net = slim.conv2d(net, 1024, 3, scope='conv_25')
net = slim.conv2d(net, 1024, 3, scope='conv_26')
net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),name='pad_27')
net = slim.conv2d(net, 1024, 3, 2, padding='VALID', scope='conv_28')
net = slim.conv2d(net, 1024, 3, scope='conv_29')
net = slim.conv2d(net, 1024, 3, scope='conv_30')
# n h w c -> n c h w
net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
# flat铺平
net = slim.flatten(net, scope='flat_32')
net = slim.fully_connected(net, 512, scope='fc_33')
net = slim.fully_connected(net, 4096, scope='fc_34')
# dropout is_trainint默认是true,当为false时,keep_prob为1
net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,scope='dropout_35')
# 输出层无需激活函数 (batch, 1470)
net = slim.fully_connected(net, num_outputs, activation_fn=None, scope='fc_36')
return net
def leaky_relu(alpha):
def op(inputs):
return tf.nn.leaky_relu(inputs, alpha=alpha, name='leaky_relu')
return op
三、模型及测试图像下载
链接:https://pan.baidu.com/s/1517r5zxY01vQgrEJUqGEfw
提取码:xh45
下载后模型保存至./model/目录下,图像保存至当前路径下
四、结果展示
五、参考
https://github.com/hizhangp/yolo_tensorflow
任何问题请加唯一QQ2258205918(名称samylee)!
或唯一VX:samylee_csdn