二、模型训练
代码链接:https://github.com/YunYang1994/tensorflow-yolov3
本章是复现以上的代码,并自己做了些其他修改,比如数据处理、后处理等,这里只贴出修改过的一些代码,其他代码可从上面链接下载。
darknet转ckpt、pb:https://blog.csdn.net/heiheiya/article/details/91437196
1.convert_weight.py
下载预训练模型
wget https://github.com/YunYang1994/tensorflow-yolov3/releases/download/v1.0/yolov3_coco.tar.gz
转换预训练模型:
#! /usr/bin/env python
# coding=utf-8
import argparse
import tensorflow as tf
from core.yolov3 import YOLOV3
from core.config import cfg
parser = argparse.ArgumentParser()
parser.add_argument("--train_from_coco", action='store_true')
flag = parser.parse_args()
org_weights_path = cfg.YOLO.ORIGINAL_WEIGHT
cur_weights_path = cfg.YOLO.DEMO_WEIGHT
preserve_cur_names = ['conv_sbbox', 'conv_mbbox', 'conv_lbbox']
preserve_org_names = ['Conv_6', 'Conv_14', 'Conv_22']
org_weights_mess = []
tf.Graph().as_default()
load = tf.train.import_meta_graph(org_weights_path + '.meta')
with tf.Session() as sess:
load.restore(sess, org_weights_path)
for var in tf.global_variables():
var_name = var.op.name
var_name_mess = str(var_name).split('/')
var_shape = var.shape
if flag.train_from_coco:
if (var_name_mess[-1] not in ['weights', 'gamma', 'beta', 'moving_mean', 'moving_variance']) or \
(var_name_mess[1] == 'yolo-v3' and (var_name_mess[-2] in preserve_org_names)): continue
org_weights_mess.append([var_name, var_shape])
print("=> " + str(var_name).ljust(50), var_shape)
print()
tf.reset_default_graph()
cur_weights_mess = []
tf.Graph().as_default()
with tf.name_scope('input'):
input_data = tf.placeholder(dtype=tf.float32, shape=(1, 416, 416, 3), name='input_data')
training = tf.placeholder(dtype=tf.bool, name='trainable')
model = YOLOV3(input_data, training)
for var in tf.global_variables():
var_name = var.op.name
var_name_mess = str(var_name).split('/')
var_shape = var.shape
print(var_name_mess[0])
if flag.train_from_coco:
if var_name_mess[0] in preserve_cur_names: continue
cur_weights_mess.append([var_name, var_shape])
print("=> " + str(var_name).ljust(50), var_shape)
org_weights_num = len(org_weights_mess)
cur_weights_num = len(cur_weights_mess)
if cur_weights_num != org_weights_num:
raise RuntimeError
print('=> Number of weights that will rename:\t%d' % cur_weights_num)
cur_to_org_dict = {}
for index in range(org_weights_num):
org_name, org_shape = org_weights_mess[index]
cur_name, cur_shape = cur_weights_mess[index]
if cur_shape != org_shape:
print(org_weights_mess[index])
print(cur_weights_mess[index])
raise RuntimeError
cur_to_org_dict[cur_name] = org_name
print("=> " + str(cur_name).ljust(50) + ' : ' + org_name)
with tf.name_scope('load_save'):
name_to_var_dict = {var.op.name: var for var in tf.global_variables()}
restore_dict = {cur_to_org_dict[cur_name]: name_to_var_dict[cur_name] for cur_name in cur_to_org_dict}
load = tf.train.Saver(restore_dict)
save = tf.train.Saver(tf.global_variables())
for var in tf.global_variables():
print("=> " + var.op.name)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print('=> Restoring weights from:\t %s' % org_weights_path)
load.restore(sess, org_weights_path)
save.save(sess, cur_weights_path)
tf.reset_default_graph()
注意:转换模型时.names文件必须和训练的模型匹配
2.core/config.py
#! /usr/bin/env python
# coding=utf-8
from easydict import EasyDict as edict
__C = edict()
# Consumers can get config by: from config import cfg
cfg = __C
# YOLO options
__C.YOLO = edict()
# Set the class name
__C.YOLO.CLASSES = "./data/img/my_data.names"
__C.YOLO.ANCHORS = "./data/img/yolo_anchors.txt"
# __C.YOLO.CLASSES = "./data/classes/coco.names"
# __C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt"
__C.YOLO.MOVING_AVE_DECAY = 0.9995
__C.YOLO.STRIDES = [8, 16, 32]
__C.YOLO.ANCHOR_PER_SCALE = 3
__C.YOLO.IOU_LOSS_THRESH = 0.5
__C.YOLO.UPSAMPLE_METHOD = "resize"
__C.YOLO.ORIGINAL_WEIGHT = "./checkpoint/yolov3_coco.ckpt"
__C.YOLO.DEMO_WEIGHT = "./checkpoint/demo.ckpt"
# Train options
__C.TRAIN = edict()
__C.TRAIN.ANNOT_PATH = "./data/img/train.txt"
__C.TRAIN.BATCH_SIZE = 1
__C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
__C.TRAIN.DATA_AUG = True
__C.TRAIN.LEARN_RATE_INIT = 1e-4
__C.TRAIN.LEARN_RATE_END = 1e-6
__C.TRAIN.WARMUP_EPOCHS = 2
__C.TRAIN.FISRT_STAGE_EPOCHS = 20
__C.TRAIN.SECOND_STAGE_EPOCHS = 50
__C.TRAIN.INITIAL_WEIGHT = "./checkpoint/demo.ckpt"
# TEST options
__C.TEST = edict()
__C.TEST.ANNOT_PATH = "./data/img/test.txt"
__C.TEST.BATCH_SIZE = 1
__C.TEST.INPUT_SIZE = 416
__C.TEST.DATA_AUG = False
__C.TEST.WRITE_IMAGE = True
__C.TEST.WRITE_IMAGE_PATH = "./data/img/xml/"
__C.TEST.WRITE_IMAGE_SHOW_LABEL = True
__C.TEST.WEIGHT_FILE = "./checkpoint/0/yolov3_test_loss=2425.1296.ckpt-21"
__C.TEST.SHOW_LABEL = True
__C.TEST.SCORE_THRESHOLD = 0.3
__C.TEST.IOU_THRESHOLD = 0.45
3.train.py
#! /usr/bin/env python
# coding=utf-8
import os
import time
import shutil
import numpy as np
import tensorflow as tf
import core.utils as utils
from tqdm import tqdm
from core.dataset import Dataset
from core.yolov3 import YOLOV3
from core.config import cfg
class YoloTrain(object):
def __init__(self):
self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
self.num_classes = len(self.classes)
self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END
self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS
self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT
self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY
self.max_bbox_per_scale = 150
self.train_logdir = "./data/log/train"
self.trainset = Dataset('train')
self.testset = Dataset('test')
self.steps_per_period = len(self.trainset)
self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
with tf.name_scope('define_input'):
self.input_data = tf.placeholder(dtype=tf.float32, name='input_data')
self.label_sbbox = tf.placeholder(dtype=tf.float32, name='label_sbbox')
self.label_mbbox = tf.placeholder(dtype=tf.float32, name='label_mbbox')
self.label_lbbox = tf.placeholder(dtype=tf.float32, name='label_lbbox')
self.true_sbboxes = tf.placeholder(dtype=tf.float32, name='sbboxes')
self.true_mbboxes = tf.placeholder(dtype=tf.float32, name='mbboxes')
self.true_lbboxes = tf.placeholder(dtype=tf.float32, name='lbboxes')
self.trainable = tf.placeholder(dtype=tf.bool, name='training')
with tf.name_scope("define_loss"):
self.model = YOLOV3(self.input_data, self.trainable)
self.net_var = tf.global_variables()
self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss(
self.label_sbbox, self.label_mbbox, self.label_lbbox,
self.true_sbboxes, self.true_mbboxes, self.true_lbboxes)
self.loss = self.giou_loss + self.conf_loss + self.prob_loss
with tf.name_scope('learn_rate'):
self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step')
warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period,
dtype=tf.float64, name='warmup_steps')
train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs)* self.steps_per_period,
dtype=tf.float64, name='train_steps')
self.learn_rate = tf.cond(
pred=self.global_step < warmup_steps,
true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init,
false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) *
(1 + tf.cos(
(self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
)
global_step_update = tf.assign_add(self.global_step, 1.0)
with tf.name_scope("define_weight_decay"):
moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())
with tf.name_scope("define_first_stage_train"):
self.first_stage_trainable_var_list = []
for var in tf.trainable_variables():
var_name = var.op.name
var_name_mess = str(var_name).split('/')
if var_name_mess[0] in ['conv_sbbox', 'conv_mbbox', 'conv_lbbox']:
self.first_stage_trainable_var_list.append(var)
first_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss,
var_list=self.first_stage_trainable_var_list)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
with tf.control_dependencies([first_stage_optimizer, global_step_update]):
with tf.control_dependencies([moving_ave]):
self.train_op_with_frozen_variables = tf.no_op()
with tf.name_scope("define_second_stage_train"):
second_stage_trainable_var_list = tf.trainable_variables()
second_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss,
var_list=second_stage_trainable_var_list)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
with tf.control_dependencies([second_stage_optimizer, global_step_update]):
with tf.control_dependencies([moving_ave]):
self.train_op_with_all_variables = tf.no_op()
with tf.name_scope('loader_and_saver'):
self.loader = tf.train.Saver(self.net_var)
self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
with tf.name_scope('summary'):
tf.summary.scalar("learn_rate", self.learn_rate)
tf.summary.scalar("giou_loss", self.giou_loss)
tf.summary.scalar("conf_loss", self.conf_loss)
tf.summary.scalar("prob_loss", self.prob_loss)
tf.summary.scalar("total_loss", self.loss)
logdir = "./data/log/"
if os.path.exists(logdir): shutil.rmtree(logdir)
os.mkdir(logdir)
self.write_op = tf.summary.merge_all()
self.summary_writer = tf.summary.FileWriter(logdir, graph=self.sess.graph)
def train(self):
self.sess.run(tf.global_variables_initializer())
try:
print('=> Restoring weights from: %s ... ' % self.initial_weight)
self.loader.restore(self.sess, self.initial_weight)
except:
print('=> %s does not exist !!!' % self.initial_weight)
print('=> Now it starts to train YOLOV3 from scratch ...')
self.first_stage_epochs = 0
for epoch in range(1, 1+self.first_stage_epochs+self.second_stage_epochs):
if epoch <= self.first_stage_epochs:
train_op = self.train_op_with_frozen_variables
else:
train_op = self.train_op_with_all_variables
pbar = tqdm(self.trainset)
train_epoch_loss, test_epoch_loss = [], []
for train_data in pbar:
_, summary, train_step_loss, global_step_val = self.sess.run(
[train_op, self.write_op, self.loss, self.global_step],feed_dict={
self.input_data: train_data[0],
self.label_sbbox: train_data[1],
self.label_mbbox: train_data[2],
self.label_lbbox: train_data[3],
self.true_sbboxes: train_data[4],
self.true_mbboxes: train_data[5],
self.true_lbboxes: train_data[6],
self.trainable: True,
})
train_epoch_loss.append(train_step_loss)
self.summary_writer.add_summary(summary, global_step_val)
pbar.set_description("train loss: %.2f" %train_step_loss)
for test_data in self.testset:
test_step_loss = self.sess.run( self.loss, feed_dict={
self.input_data: test_data[0],
self.label_sbbox: test_data[1],
self.label_mbbox: test_data[2],
self.label_lbbox: test_data[3],
self.true_sbboxes: test_data[4],
self.true_mbboxes: test_data[5],
self.true_lbboxes: test_data[6],
self.trainable: False,
})
test_epoch_loss.append(test_step_loss)
train_epoch_loss, test_epoch_loss = np.mean(train_epoch_loss), np.mean(test_epoch_loss)
ckpt_file = "./checkpoint/0/yolov3_test_loss=%.4f.ckpt" % test_epoch_loss
log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
print("=> Epoch: %2d Time: %s Train loss: %.2f Test loss: %.2f Saving %s ..."
%(epoch, log_time, train_epoch_loss, test_epoch_loss, ckpt_file))
self.saver.save(self.sess, ckpt_file, global_step=epoch)
if __name__ == '__main__': YoloTrain().train()
4.ckpt2pb.py
#! /usr/bin/env python
# coding=utf-8
import tensorflow as tf
from core.yolov3 import YOLOV3
pb_file = "./checkpoint/0/test.pb"
ckpt_file = "./checkpoint/0/yolov3_test_loss=34.2846.ckpt-43"
output_node_names = ["input/input_data", "pred_sbbox/concat_2", "pred_mbbox/concat_2", "pred_lbbox/concat_2"]
with tf.name_scope('input'):
input_data = tf.placeholder(dtype=tf.float32, name='input_data')
model = YOLOV3(input_data, trainable=False)
print(model.conv_sbbox, model.conv_mbbox, model.conv_lbbox)
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
saver = tf.train.Saver()
saver.restore(sess, ckpt_file)
converted_graph_def = tf.graph_util.convert_variables_to_constants(sess,
input_graph_def = sess.graph.as_graph_def(),
output_node_names = output_node_names)
with tf.gfile.GFile(pb_file, "wb") as f:
f.write(converted_graph_def.SerializeToString())
5.image_demo.py
#! /usr/bin/env python
# coding=utf-8
import os
import cv2
import numpy as np
import core.utils as utils
import tensorflow as tf
from PIL import Image
# pb图的名字反了,conv_sbbox是大框,conv_lbbox是小框
return_elements = ["input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0"]
# return_elements = ["input/input_data:0", "conv_sbbox/BiasAdd:0", "conv_mbbox/BiasAdd:0", "conv_lbbox/BiasAdd:0"]
pb_file = "./checkpoint/0/test.pb"
path="data2/test/"
for file in os.listdir(path):
image_path=path+file
# image_path = "./data2/test/357.jpg"
num_classes = 2
input_size = 416
graph = tf.Graph()
original_image = cv2.imread(image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_image_size = original_image.shape[:2]
image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size])
image_data = image_data[np.newaxis, ...]
print(image_data.shape)
return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements)
with tf.Session(graph=graph) as sess:
pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
[return_tensors[1], return_tensors[2], return_tensors[3]],
feed_dict={ return_tensors[0]: image_data})
# np.save("data/test/conv_sbbox.npy", pred_sbbox)
# np.save("data/test/conv_mbbox.npy", pred_mbbox)
# np.save("data/test/conv_lbbox.npy", pred_lbbox)
print(pred_sbbox.shape, pred_mbbox.shape, pred_lbbox.shape)
pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)),
np.reshape(pred_mbbox, (-1, 5 + num_classes)),
np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)
bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, (input_size,input_size), 0.3)
bboxes = utils.nms(bboxes, 0.45, method='nms')
print("bboxes:", bboxes, len(bboxes))
image = utils.draw_bbox(original_image, bboxes)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imshow("img",image)
cv2.waitKey(0)
save_path="data/result/"+file
cv2.imwrite(save_path,image)
6.core/utils.py
#! /usr/bin/env python
# coding=utf-8
#================================================================
# Copyright (C) 2019 * Ltd. All rights reserved.
#
# Editor : VIM
# File name : utils.py
# Author : YunYang1994
# Created date: 2019-02-28 13:14:19
# Description :
#
#================================================================
import cv2
import random
import colorsys
import numpy as np
import tensorflow as tf
from core.config import cfg
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names
def get_anchors(anchors_path):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = np.array(anchors.split(','), dtype=np.float32)
return anchors.reshape(3, 3, 2)
def image_preporcess(image, target_size, gt_boxes=None):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
ih, iw = target_size
h, w, _ = image.shape
scale = min(iw/w, ih/h)
nw, nh = int(scale * w), int(scale * h)
image_resized = cv2.resize(image, (nw, nh))
image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
dw, dh = (iw - nw) // 2, (ih-nh) // 2
image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
image_paded = image_paded / 255.
if gt_boxes is None:
return image_paded
else:
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_paded, gt_boxes
def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
bbox_color = colors[class_ind]
bbox_thick = int(0.6 * (image_h + image_w) / 600)
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled
cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
return image
def bboxes_iou(boxes1, boxes2):
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
return ious
def read_pb_return_tensors(graph, pb_file, return_elements):
with tf.gfile.FastGFile(pb_file, 'rb') as f:
frozen_graph_def = tf.GraphDef()
frozen_graph_def.ParseFromString(f.read())
with graph.as_default():
return_elements = tf.import_graph_def(frozen_graph_def,
return_elements=return_elements)
return return_elements
# 将一个目标多个重复的框进行去除
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)
Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind]
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes
# (10647, 25)->(10647, 6) 20类变1类
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
valid_scale=[0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4] #xywh 0,1,2,3
pred_conf = pred_bbox[:, 4] #置信度 4
pred_prob = pred_bbox[:, 5:] #各个类别的概率(这里20类)
# print(pred_conf)
# # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) pred_coor以后表示的都是 目标的左上角坐标 和右下角坐标
pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
# # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
org_h, org_w = org_img_shape
resize_ratio = min(input_size[0] / org_w, input_size[1] / org_h)
dw = (input_size[0] - resize_ratio * org_w) / 2
dh = (input_size[1] - resize_ratio * org_h) / 2
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
# # (3) clip some boxes those are out of range
pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0
# # (4) discard some invalid boxes
bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
# # (5) discard some boxes with low scores
classes = np.argmax(pred_prob, axis=-1)
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)
coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
三、后处理
由于在冻结pb的时候,输出节点冻结在__build_nework模块了,需要自己再写decode模型,这里用numpy代替tensorflow复现了decode模块。
备注:这里是尝试阶段的代码,真正代码已写到core/utils.py。
1.tranform.py
import cv2
import random
import numpy as np
import tensorflow as tf
import post_process as post_process
from PIL import Image
import math
from numpy import *
# 1.cv读取图片,bgr转rgb
# 2.预处理:resize/pad/归一化
# 3.预测图片-pred_sbbox/pred_mbbox/pred_lbbox
# 4.合并:concat/后处理/nms
# # 卷积过程
# x = DarknetConv2D_BN_Leaky(32, (3, 3))(x) # (内置same卷积)输出的x-》416*416*32
# x = resblock_body(x, 64, 1) # num_filters = 64 , num_blocks = 1(重复次数) 返回结果208*208*64
# x = resblock_body(x, 128, 2) # 返回结果 104*104*128
# x = resblock_body(x, 256, 8) # 返回结果 52*52*256
# x = resblock_body(x, 512, 8) # 返回结果 26*26*512
# x = resblock_body(x, 1024, 4) # 返回结果 13*13*1024 5组重复的resblock_body()单元
class_name="./data/classes/coco.names"
image_path = "./data/b.jpg"
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_size = img.shape[:2]
num_class=80
input_size=(416, 416) #可变
score_threshold=0.3
iou_threshold=0.45
conv_sbbox=np.load("data/test/conv_sbbox.npy")
conv_mbbox=np.load("data/test/conv_mbbox.npy")
conv_lbbox=np.load("data/test/conv_lbbox.npy")
print(conv_sbbox.shape, conv_mbbox.shape, conv_lbbox.shape)
bboxes=post_process.function(conv_sbbox,conv_mbbox,conv_lbbox, num_class,img_size,input_size,score_threshold,iou_threshold)
print(bboxes)
print("-------------post process result-------------")
print("result_num:", len(bboxes))
for index in range(len(bboxes)):
result=bboxes[index]
x,y,w,h=result[:4]
detection_scores=result[4]
detection_classes=result[5]
print("result_{}={}".format(index, result))
print("result: x,y,w,h={}, detection_scores={}, detection_classes={}".format((x,y,w,h),detection_scores,detection_classes))
image = post_process.draw_bbox(img, bboxes, class_name)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
cv2.imshow("img",image)
cv2.waitKey(0)
2.post_process.py
import cv2
import random
import colorsys
import numpy as np
# anchors, stride
anchors=[[[1.25, 1.625],
[2., 3.75],
[4.125, 2.875]],
[[1.875, 3.8125],
[3.875, 2.8125],
[3.6875, 7.4375]],
[[3.625, 2.8125],
[4.875, 6.1875],
[11.65625, 10.1875]]]
anchors=np.array(anchors).astype(np.float32)
strides=[8, 16, 32]
def sigmoid(x):
return 1.0/(1+np.exp(-x))
def decode(conv_output, anchors, stride, num_class):
conv_shape=conv_output.shape
batch_size = conv_shape[0]
output_size = conv_shape[1]
anchor_per_scale = len(anchors)
conv_output = np.reshape(conv_output, (batch_size, output_size, output_size, anchor_per_scale, 5 + num_class))
conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
conv_raw_conf = conv_output[:, :, :, :, 4:5]
conv_raw_prob = conv_output[:, :, :, :, 5: ]
# print("numpy:",conv_raw_dxdy.shape, conv_raw_dwdh.shape)
a=np.arange(output_size, dtype=int)[:,np.newaxis]
y = np.tile(a, [1, output_size])
b=np.arange(output_size, dtype=int)[np.newaxis, :]
x = np.tile(b, [output_size, 1])
xy_grid = np.concatenate([x[:, :, np.newaxis], y[:, :, np.newaxis]], axis=-1)
xy_grid = np.tile(xy_grid[np.newaxis, :, :, np.newaxis, :], [batch_size, 1, 1, anchor_per_scale, 1])
xy_grid = xy_grid.astype(np.float32)
pred_xy = (sigmoid(conv_raw_dxdy) + xy_grid) * stride #[1,13,13,3,2]
pred_wh = (np.exp(conv_raw_dwdh) * anchors) * stride #[1,13,13,3,2]
pred_xywh = np.concatenate([pred_xy, pred_wh], axis=-1)
pred_conf = sigmoid(conv_raw_conf)
pred_prob = sigmoid(conv_raw_prob)
result=np.concatenate([pred_xywh, pred_conf, pred_prob], axis=-1)
# np.save("data/test/0/x.npy", pred_wh)
# print(pred_xywh.dtype)
return result
def bboxes_iou(boxes1, boxes2):
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
return ious
# 将一个目标多个重复的框进行去除
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)
Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind]
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes
# (10647, 25)->(10647, 6) 20类变1类
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
valid_scale=[0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4] #xywh 0,1,2,3
pred_conf = pred_bbox[:, 4] #置信度 4
pred_prob = pred_bbox[:, 5:] #各个类别的概率(这里20类)
# print(pred_conf)
# # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) pred_coor以后表示的都是 目标的左上角坐标 和右下角坐标
pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
# # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
org_h, org_w = org_img_shape
resize_ratio = min(input_size[0] / org_w, input_size[1] / org_h)
dw = (input_size[0] - resize_ratio * org_w) / 2
dh = (input_size[1] - resize_ratio * org_h) / 2
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
# # (3) clip some boxes those are out of range
pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0
# # (4) discard some invalid boxes
bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
# # (5) discard some boxes with low scores
classes = np.argmax(pred_prob, axis=-1)
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)
coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
# 读取数据集类名
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names
# 画出图片框
def draw_bbox(image, bboxes, class_name, show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""
classes=read_class_names(class_name)
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
bbox_color = colors[class_ind]
bbox_thick = int(0.6 * (image_h + image_w) / 600)
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled
cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
return image
def function(conv_sbbox,conv_mbbox,conv_lbbox, num_class,img_size,input_size,score_threshold,iou_threshold):
# class_name="./data/classes/coco.names"
# image_path = "./data/dog2.jpg"
# img = cv2.imread(image_path)
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# img_size = img.shape[:2]
print("input_shape---", conv_sbbox.shape, conv_mbbox.shape, conv_lbbox.shape)
pred_sbbox=decode(conv_sbbox, anchors[0], strides[0], num_class)
pred_mbbox=decode(conv_mbbox, anchors[1], strides[1], num_class)
pred_lbbox=decode(conv_lbbox, anchors[2], strides[2], num_class)
print("decode_shape---",pred_sbbox.shape, pred_mbbox.shape, pred_lbbox.shape)
pred_sbbox=np.reshape(pred_sbbox, (-1, 5 + num_class))
pred_mbbox=np.reshape(pred_mbbox, (-1, 5 + num_class))
pred_lbbox=np.reshape(pred_lbbox, (-1, 5 + num_class))
pred_bbox = np.concatenate([pred_sbbox,pred_mbbox,pred_lbbox], axis=0)
print(pred_sbbox.shape, pred_mbbox.shape, pred_lbbox.shape, pred_bbox.shape)
bboxes = postprocess_boxes(pred_bbox, img_size, input_size, score_threshold) #4坐标+1置信度+1类别
bboxes = nms(bboxes, iou_threshold, method='nms')
return bboxes
# image = draw_bbox(img, bboxes, class_name)
# image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# cv2.imshow("img",image)
# cv2.waitKey(0)