# -- encoding:utf-8 --
import config as cfg
from data_utils import FlowerDataLoader
from data_utils import nms, check_directory, fetch_selective_search_images, calc_ground_truth_predict_bounding_box
from networks import AlexNet, SVMModel, RegressionNet
import collections
import os
import pickle
import numpy as np
import cv2 as cv
import tensorflow as tf
import time
class SolverType(object):
TRAIN_FINE_TUNE_MODEL = 0 # fine tune 的AlexNet微调训练
GENERATE_TRAIN_SVM_FEATURES = 1 # 获取SVM训练用的高阶特征
TRAIN_SVM_MODEL = 2 # 训练SVM模型
GENERATE_TRAIN_REGRESSION_FEATURES = 3 # 获取训练回归模型的高阶特征
TRAIN_REGRESSION_MODEL = 4 # 训练回归模型
PREDICT_BOUNDING_BOX = 5 # 表示整个预测过程
PREDICT_BOUNDING_BOX_STEP1 = 6 # 表示预测过程的第一步:获取SS边框
PREDICT_BOUNDING_BOX_STEP2 = 7 # 表示预测过程的第二步:获取边框的高阶特征值
PREDICT_BOUNDING_BOX_STEP3 = 8 # 表示预测过程的第三步:获取SVM预测结果
PREDICT_BOUNDING_BOX_STEP4 = 9 # 表示预测过程的第四步:获取回归预测结果
class Solver(object):
def __init__(self, solver_type):
# 输入的属性信息
self.is_training = False
self.is_svm = False
self.is_regression = False
if SolverType.TRAIN_FINE_TUNE_MODEL == solver_type:
with tf.Graph().as_default():
print("进行Fine Tune模型训练操作....")
self.is_training = True
# 1、实例化AlexNet这个构建模型图的类
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training)
self.data_loader = FlowerDataLoader()
# 2、设置相关配置文件
self.__set_fine_tune_config()
# 输出文件路径的检查
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir)
# 3、构造全局step对象
self.__get_or_create_global_step()
# 4、构造训练优化器
self.__create_tf_train_op()
# 5、构造持久化对象
self.__create_tf_saver()
# 6、构造可视化对象
self.__create_tf_summary()
# 7、构造会话tf.session 以及初始化变量或者恢复模型
self.__create_tf_session_and_initial()
# 8、设置运行的方法(跑模型)
self.run = self.__fine_tune_train
elif SolverType.GENERATE_TRAIN_SVM_FEATURES == solver_type:
with tf.Graph().as_default():
print("生成SVM分类训练用高阶特征属性,并持久化磁盘文件....")
self.is_training = False
self.is_svm = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training, is_svm=self.is_svm)
self.data_loader = FlowerDataLoader()
# 设置相关配置文件
self.__set_fine_tune_config()
# 输出文件路径的检查
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
# 构造全局step对象
self.__get_or_create_global_step()
# 构造持久化对象
self.__create_tf_saver()
# 构造可视化对象
self.__create_tf_summary()
# 构造会话以及初始化变量
self.__create_tf_session_and_initial()
# 设置运行的方法
self.run = self.__persistent_svm_higher_features
elif SolverType.TRAIN_SVM_MODEL == solver_type:
print("进行SVM模型训练操作....")
self.is_svm = True
self.is_training = True
self.net = SVMModel(is_training=self.is_training)
# 设置运行的方法
self.run = self.__svm_train
elif SolverType.GENERATE_TRAIN_REGRESSION_FEATURES == solver_type:
with tf.Graph().as_default():
print("生成回归模型训练用高阶特征属性,并持久化磁盘文件....")
self.is_training = False
self.is_svm = False
self.is_regression = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training, is_svm=self.is_svm, is_regression=self.is_regression)
self.data_loader = FlowerDataLoader()
# 设置相关配置文件
self.__set_fine_tune_config()
# 输出文件路径的检查
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
# 构造全局step对象
self.__get_or_create_global_step()
# 构造持久化对象
self.__create_tf_saver()
# 构造可视化对象
self.__create_tf_summary()
# 构造会话以及初始化变量
self.__create_tf_session_and_initial()
# 设置运行的方法
self.run = self.__persistent_regression_higher_features_new
elif SolverType.TRAIN_REGRESSION_MODEL == solver_type:
with tf.Graph().as_default():
print("进行回归模型训练..")
self.is_regression = True
self.is_training = True
self.net = RegressionNet(is_training=self.is_training)
self.data_loader = FlowerDataLoader()
# 设置相关配置文件
self.__set_regression_net_config()
# 输出文件路径的检查
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir)
# 构造全局step对象
self.__get_or_create_global_step()
# 构造训练对象
self.__create_tf_train_op()
# 构造持久化对象
self.__create_tf_saver()
# 构造可视化对象
self.__create_tf_summary()
# 构造会话以及初始化变量
self.__create_tf_session_and_initial()
# 设置运行的方法
self.run = self.__regression_train
elif SolverType.PREDICT_BOUNDING_BOX_STEP1 == solver_type:
print("产生Selective Search候选框对应的图像信息....")
self.run = self.__fetch_selective_search_box
elif SolverType.PREDICT_BOUNDING_BOX_STEP2 == solver_type:
with tf.Graph().as_default():
print("进行获取SVM分类以及Regression Box高阶特征操作....")
self.is_training = False
self.is_svm = True
self.is_regression = True
self.net = AlexNet(alexnet_mat_file_path=cfg.ALEX_NET_MAT_FILE_PATH,
is_training=self.is_training, is_svm=self.is_svm, is_regression=self.is_regression)
# 设置相关配置文件
self.__set_fine_tune_config()
# 输出文件路径的检查
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
# 构造全局step对象
self.__get_or_create_global_step()
# 构造持久化对象
self.__create_tf_saver()
# 构造可视化对象
self.__create_tf_summary()
# 构造会话以及初始化变量
self.__create_tf_session_and_initial()
# 设置运行的方法
self.run = self.__fetch_higher_features_by_images
elif SolverType.PREDICT_BOUNDING_BOX_STEP3 == solver_type:
print("进行SVM模型预测操作,得到各个边框属于某个类别的预测值!!!")
self.net = SVMModel(is_training=False)
self.run = self.__fetch_svm_predict
elif SolverType.PREDICT_BOUNDING_BOX_STEP4 == solver_type:
with tf.Graph().as_default():
print("进行Bounding Box回归预测操作,得到各个边框的偏移量信息....")
self.is_training = False
self.is_svm = False
self.is_regression = True
self.net = RegressionNet(is_training=self.is_training)
# 设置相关配置文件
self.__set_regression_net_config()
# 输出文件路径的检查
check_directory(self.summary_writer_log_dir)
check_directory(self.checkpoint_dir, created=False, error=True)
# 构造全局step对象
self.__get_or_create_global_step()
# 构造持久化对象
self.__create_tf_saver()
# 构造可视化对象
self.__create_tf_summary()
# 构造会话以及初始化变量
self.__create_tf_session_and_initial()
# 设置运行的方法
self.run = self.__fetch_regression_bounding_box
else:
raise Exception("不支持当前solver_stype:{}".format(solver_type))
def __fetch_regression_bounding_box(self, x):
"""
获取回归模型返回的偏移值
:param x:
:return:
"""
return self.session.run(self.net.logits, feed_dict={self.net.input_data: x})
def __fetch_svm_predict(self, x):
"""
对高阶的特征属性数据x进行svm模型预测,并返回各个边框属于类别的概率信息
x表示一张图形的所有候选框的高阶特征数据
:param x:
:return:
"""
result = []
# 1. 得到具体有哪些类别的模型
labels = self.net.fetch_labels()
# 2. 使用每个标签对应的模型对x进行预测(对这张图形中的各个不同的候选框进行预测)
for label in labels:
# a. 获取所有边框属于当前类别label的概率值/置信度
predict = self.net.predict_proba(x, label)
# b. 基于边框信息以及置信度信息,获取当前这张图形上属于这个类别的最终边框
"""
实现方式:
方法2. 简单(定位任务的思路):获取概率最大的那个作为最终的预测边框
方法1. 使用NMS非极大值抑制的策略来获取确定边框
"""
if cfg.SVM_PREDICT_PROBABILITY_WITH_NMS:
# 方法1:获取所有超过阈值的边框以及概率值(注意NMS不是在这里做)
# a. 获取所有大于阈值的概率对应的下标索引
index = np.where(predict >= cfg.SVM_PREDICT_PROBABILITY_THRESHOLD)[0]
# b. 遍历索引并添加
if np.size(index) > 0:
for idx in index:
result.append([idx, label, predict[idx]])
else:
# 方法2:不进行NMS操作,这里选择最大概率值
# 获取最大值以及最大值对应的索引
max_predict_index = np.argmax(predict)
max_predict = predict[max_predict_index]
# 进行数据过滤判断
if max_predict > cfg.SVM_PREDICT_PROBABILITY_THRESHOLD:
# 表示当前这个边框中的图形是属于这个类别的
# 值的类型: [边框的序号,所属类别,概率值]
result.append([max_predict_index, label, max_predict])
# 3. 格式转换
result = np.asarray(result)
return result
def __fetch_higher_features_by_images(self, images):
"""
获取对应图像的高阶特征信息,也就是同时返回图像在Conv5和FC7层输出作为高阶特征
:param images:
:return:
"""
svm_higher_features, regression_higher_features = self.session.run(
[self.net.svm_logits, self.net.regression_logits],
feed_dict={self.net.input_data: images}
)
return svm_higher_features, regression_higher_features
def __fetch_selective_search_box(self, file_path):
"""
对于给定的文件路径的图形获取对应的边框信息,主要返回两个值:边框图像以及边框的坐标
:param file_path:
:return:
"""
# 1. 获取候选框数据
result = fetch_selective_search_images(file_path=file_path,
image_width=cfg.IMAGE_WIDTH,
image_height=cfg.IMAGE_HEIGHT)
# 2. 遍历数据得到最终的结果
images = []
bounding_boxs = []
if result is not None:
for image, box in result:
images.append(image)
bounding_boxs.append(box)
# 3. 结果转换
images = np.asarray(images)
bounding_boxs = np.asarray(bounding_boxs)
return images, bounding_boxs
def __set_regression_net_config(self):
"""
配置RegressionNet相关的属性信息
:return:
"""
self.initial_learning_rate = cfg.REGRESSION_INITIAL_LEARNING_RATE
self.decay_steps = cfg.REGRESSION_DECAY_STEPS
self.decay_rate = cfg.REGRESSION_DECAY_RATE
self.summary_writer_log_dir = cfg.REGRESSION_SUMMARY_WRITER_LOG_DIR
self.checkpoint_dir = cfg.REGRESSION_CHECKPOINT_DIR
self.checkpoint_path = os.path.join(self.checkpoint_dir, cfg.REGRESSION_CHECKPOINT_FILENAME)
self.max_steps = cfg.REGRESSION_MAX_STEP
self.summary_step = cfg.REGRESSION_SUMMARY_STEP
self.checkpoint_step = cfg.REGRESSION_CHECKPOINT_STEP
def __set_fine_tune_config(self):
"""
配置Fine Tune的AlexNet相关的属性信息
:return:
"""
self.initial_learning_rate = cfg.FINE_TUNE_INITIAL_LEARNING_RATE
self.decay_steps = cfg.FINE_TUNE_DECAY_STEPS
self.decay_rate = cfg.FINE_TUNE_DECAY_RATE
self.summary_writer_log_dir = cfg.FINE_TUNE_SUMMARY_WRITER_LOG_DIR
self.checkpoint_dir = cfg.FINE_TUNE_CHECKPOINT_DIR
self.checkpoint_path = os.path.join(self.checkpoint_dir, cfg.FINE_TUNE_CHECKPOINT_FILENAME)
self.max_steps = cfg.FINE_TUNE_MAX_STEP
self.summary_step = cfg.FINE_TUNE_SUMMARY_STEP
self.checkpoint_step = cfg.FINE_TUNE_CHECKPOINT_STEP
def __get_or_create_global_step(self):
# 获取全局步长变量
self.global_step = tf.train.get_or_create_global_step()
def __create_tf_train_op(self):
# 构造优化器
if self.is_training:
with tf.variable_scope("train"):
# 模型更新的学习率
self.learning_rate = tf.train.exponential_decay(
learning_rate=self.initial_learning_rate,
global_step=self.global_step,
decay_steps=self.decay_steps,
decay_rate=self.decay_rate,
name='learning_rate')
tf.summary.scalar('learning_rate', self.learning_rate)
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) \
.minimize(self.net.total_loss, global_step=self.global_step)
if self.is_regression:
# 在回归的训练中不进行参数的窗口滑动均值的操作
self.train_op = self.optimizer
else:
# 加一个模型参数滑动平均的操作,也就是将最近几个更新批次的参数更改为均值的方式
self.ema = tf.train.ExponentialMovingAverage(0.99)
with tf.control_dependencies([self.optimizer]):
self.train_op = self.ema.apply(tf.trainable_variables())
def __create_tf_saver(self):
# 模型持久化
self.saver = tf.train.Saver(max_to_keep=2)
def __create_tf_summary(self):
# 可视化对象
self.summary = tf.summary.merge_all()
self.writer = tf.summary.FileWriter(self.summary_writer_log_dir, graph=tf.get_default_graph())
def __create_tf_session_and_initial(self):
# 构造会话
self.session = tf.Session()
# 如果模型存在的话,做一个模型恢复的操作
ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
print("进行模型恢复操作...")
# 恢复模型
self.saver.restore(self.session, ckpt.model_checkpoint_path)
# 恢复checkpoint的管理信息
self.saver.recover_last_checkpoints(ckpt.all_model_checkpoint_paths)
else:
# 参数初始化
self.session.run(tf.global_variables_initializer())
print('未有模型持久化文件,从头开始训练!')
def __svm_train(self):
"""
进行SVM模型训练
:return:
"""
self.net.train()
def __fine_tune_train(self):
if not self.is_training:
raise Exception("Train method request set 'is_training' parameter is True.")
# 获取开始的步骤
start_step = self.session.run(self.global_step)
# 获取最大迭代步骤
end_step = start_step + self.max_steps
# 遍历进行训练
for step in range(start_step, end_step):
# 1. 获取数据
images, labels = self.data_loader.get_fine_tune_batch()
# 2. 模型训练
feed_dict = {self.net.input_data: images, self.net.label: labels}
# 3. 间隔性的进行可视化操作
if step % self.summary_step == 0:
summary_, loss_, accuracy_, _ = self.session.run(
[self.summary, self.net.total_loss, self.net.accuracy, self.train_op],
feed_dict=feed_dict)
self.writer.add_summary(summary_, global_step=step)
print("Training Step:{}, Loss:{}, Accuracy:{}".format(step, loss_, accuracy_))
else:
self.session.run(self.train_op, feed_dict=feed_dict)
# 4. 间断性的做一个模型持久化的操作
if (step+1) % self.checkpoint_step == 0:
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
# 训练完后,保存最终模型
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
def __regression_train(self):
if not self.is_training:
raise Exception("Train method request set 'is_training' parameter is True.")
# 获取开始的步骤
start_step = self.session.run(self.global_step)
# 获取最大迭代步骤
end_step = start_step + self.max_steps
# 遍历进行训练
for step in range(start_step, end_step):
# 1. 获取数据
images, labels = self.data_loader.get_regression_box_train_batch()
# 2. 模型训练
feed_dict = {self.net.input_data: images, self.net.label: labels}
# 3. 间隔性的进行可视化操作
if step % self.summary_step == 0:
summary_, loss_, _ = self.session.run(
[self.summary, self.net.total_loss, self.train_op],
feed_dict=feed_dict)
self.writer.add_summary(summary_, global_step=step)
print("Training Step:{}, Loss:{}".format(step, loss_))
else:
self.session.run(self.train_op, feed_dict=feed_dict)
# 4. 间断性的做一个模型持久化的操作
if (step+1) % self.checkpoint_step == 0:
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
# 训练完后,保存最终的模型
print("Saving model to {}".format(self.checkpoint_dir))
self.saver.save(sess=self.session, save_path=self.checkpoint_path, global_step=step)
def __fine_tune_predict(self, images):
"""
运行,得到Fine Tune模型的返回值(logits 作为svm模型的输入)
:param images:
"""
return self.session.run(self.net.logits, feed_dict={self.net.input_data: images})
def __persistent_svm_higher_features(self):
"""
持久化用于svm模型训练的高阶特征数据
注意:在svm模型训练中,是针对每个类别训练一个svm模型,所有在这里需要对于每个类别产生一个训练数据文件
:return:
"""
# 1. 获取标签值
check_directory(cfg.TRAIN_LABEL_DICT_FILE_PATH, created=False, error=True)
class_name_2_index_dict = pickle.load(open(cfg.TRAIN_LABEL_DICT_FILE_PATH, 'rb'))
# 2. 遍历所有标签值
for class_name, index in class_name_2_index_dict.items():
print("Start process type '{}/{}' datas...".format(index, class_name))
X = None
Y = None
# a. 获取当前标签对应的所有训练数据
while self.data_loader.has_next_structure_higher_features_batch(index):
# 1. 获取当前批次对应的值
images, labels = self.data_loader.get_structure_higher_features(label=index)
# 2. 过滤数据
if images is None or labels is None:
continue
# 3. 调用预测代码,获取预测结果
print(np.shape(images), np.shape(labels))
higher_features = self.__fine_tune_predict(images)
# 4. 赋值
if X is None:
X = higher_features
Y = labels
else:
X = np.append(X, higher_features, axis=0)
Y = np.append(Y, labels)
print("Final Feature Attribute Structure:{} - {}".format(np.shape(X), np.shape(Y)))
print("Number of occurrences of each category:{}".format(collections.Counter(Y)))
# e. 数据持久化保存
# 合并数据(按列合并特征 和 标签)
data = np.concatenate((X, np.reshape(Y, (-1, 1))), axis=1)
# 文件路径获取
svm_higher_features_save_path = cfg.TRAIN_SVM_HIGHER_FEATURES_DATA_FILE_PATH.format(index)
check_directory(os.path.dirname(svm_higher_features_save_path))
# 数据输出
np.save(svm_higher_features_save_path, data)
def __persistent_regression_higher_features(self):
"""
获取训练回归模型用的特征属性数据,并持久化磁盘文件
获取所有和GT的IoU值在0.6以上的候选框全部作为训练数据
:return:
"""
X = None
Y = None
print("开始生成回归用的高阶特征!")
# a. 获取当前标签对应的所有训练数据
while self.data_loader.has_next_regression_box_batch():
# 1. 获取当前批次对应的值
images, labels = self.data_loader.get_regression_box_batch()
# 2. 过滤数据
if images is None or labels is None:
continue
# 3. 调用预测代码,获取预测结果
print(np.shape(images), np.shape(labels))
higher_features = self.__fine_tune_predict(images)
# 4. 赋值
if X is None:
X = higher_features
Y = labels
else:
X = np.append(X, higher_features, axis=0)
Y = np.append(Y, labels, axis=0)
print("Final Feature Attribute Structure:{} - {}".format(np.shape(X), np.shape(Y)))
# e. 数据持久化保存
# 合并数据
data = np.concatenate((X, Y), axis=1)
# 文件路径获取
regression_higher_features_save_path = cfg.TRAIN_REGRESSION_HIGHER_FEATURES_DATA_FILE_PATH
check_directory(os.path.dirname(regression_higher_features_save_path))
# 数据输出
np.save(regression_higher_features_save_path, data)
def __persistent_regression_higher_features_new(self):
"""
获取训练回归模型用的特征属性数据,并持久化磁盘文件
获取所有和GT的IoU值在0.6以上的候选框全部作为训练数据
:return:
"""
X = None
Y = None
print("开始生成回归用的高阶特征!")
# a. 获取当前标签对应的所有训练数据
for batch_start_id in range(0, len(self.data_loader.regression_box_higher_features_samples_index),
self.data_loader.regression_box_batch_size):
batch_index = self.data_loader.regression_box_higher_features_samples_index[
batch_start_id: batch_start_id + self.data_loader.regression_box_batch_size]
# 1. 获取当前批次对应的值
images = self.data_loader.fine_tune_x[batch_index]
labels = self.data_loader.regression_box_higher_features_Y[batch_index]
# 2. 过滤数据
if images is None or labels is None:
continue
# 3. 调用预测代码,获取预测结果
print(np.shape(images), np.shape(labels))
higher_features = self.__fine_tune_predict(images)
# 4. 赋值
if X is None:
X = higher_features
Y = labels
else:
X = np.append(X, higher_features, axis=0)
Y = np.append(Y, labels, axis=0)
print("Final Feature Attribute Structure:{} - {}".format(np.shape(X), np.shape(Y)))
# e. 数据持久化保存
# 合并数据
data = np.concatenate((X, Y), axis=1)
# 文件路径获取
regression_higher_features_save_path = cfg.TRAIN_REGRESSION_HIGHER_FEATURES_DATA_FILE_PATH
check_directory(os.path.dirname(regression_higher_features_save_path))
# 数据输出
np.save(regression_higher_features_save_path, data)
def run_solver():
flag = 5
if flag == 0:
solver = Solver(SolverType.TRAIN_FINE_TUNE_MODEL)
solver.run()
elif flag == 1:
solver = Solver(SolverType.GENERATE_TRAIN_SVM_FEATURES)
solver.run()
elif flag == 2:
solver = Solver(SolverType.TRAIN_SVM_MODEL)
solver.run()
elif flag == 3:
solver = Solver(SolverType.GENERATE_TRAIN_REGRESSION_FEATURES)
solver.run()
elif flag == 4:
solver = Solver(SolverType.TRAIN_REGRESSION_MODEL)
solver.run()
elif flag == 5:
# 1、边框获取
# file_path = '../rcnn_datas/2flowers/jpg/1/image_1281.jpg'
# file_path = '../rcnn_datas/2flowers/jpg/t1.jpg'
file_path = '../rcnn_datas/2flowers/jpg/t1.jpg'
print("获取候选框.....")
t1 = time.time()
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP1)
"""
images: path这个图像所对应的所有候选框图像数据,形状为: [total_roi_samples, H, W, C]
boxs: path这个对象对应的候选框所对应的坐标值,形状为: [total_roi_samples, 4], 坐标的顺序为: [左上角x,左上角y,右下角x, 右下角y]
eg: images[0]和boxs[0]分别表示第一个ROI区域图像以及这个图像在原始图像中的坐标点信息
"""
images, bounding_boxs = solver.run(file_path)
print("候选框获取完成.....")
print(time.time() - t1)
print(np.shape(images))
print(np.shape(bounding_boxs))
# 2、获取高阶特征
print("开始获取高阶特征....")
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP2)
"""
svm_higher_features: 特征提取网络的第FC7层的输出,形状为: [total_roi_samples, 4096], 用于svm模型的预测
regression_higher_features: 特征提取网络的第Conv5层的输出,形状为: [total_roi_samples, 9216], 用于回归模型的预测
"""
svm_higher_features, regression_higher_features = solver.run(images)
print('预测step2 获取高阶特征结果:')
print(np.shape(svm_higher_features))
print(np.shape(regression_higher_features))
# 3、使用svm高阶特征做分类预测(得到最终的预测概率值信息)
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP3)
"""
predict_svm: 预测结果,保留的是所有可能属于物品的的边框信息,形状:[K, 3], K表示总共有K个边框可以确定属于物体
第一列为边框id(下标),第二列为边框所属类别,第三列为边框所属类别的可能性概率值
"""
predict_svm = solver.run(svm_higher_features)
print('step3 svm分类预测结果为:', predict_svm)
# 4、使用回归高阶特征进行回归偏移值的预测
# 获取svm最终确定比较可靠的物体边框
box_indexs = predict_svm[:, 0].astype(np.int) # 获取确认属于物体的下标列表
boxs = np.reshape(bounding_boxs[box_indexs], (-1, np.shape(bounding_boxs)[1]))
# 获取回归预测使用的高阶特征
rhf = regression_higher_features[box_indexs] # 获取确认为物体的边框高阶特征,因为只对包含物体的边框做回归转换
rhf = np.reshape(rhf, (-1, np.shape(regression_higher_features)[1])) # 有可能只有一个边框是有效的,所以格式的重置一下
solver = Solver(SolverType.PREDICT_BOUNDING_BOX_STEP4)
"""
# offset_box: 针对确定属于物体的边框,进行回归模型后,所得到的转换系数,形状为: [K, 4]
# 也就是每个边框产生4个转换系数: tx\ty\tw\th
"""
offset_box = solver.run(rhf)
print(offset_box)
# 5. 使用offset_box和RoI候选框的坐标计算转换之后的坐标位置信息
final_box = calc_ground_truth_predict_bounding_box(boxs, offset_box)
print("SS产生的边框(SVM确定属于物体):\n{}".format(boxs))
print("边框值(微调):\n{}".format(final_box))
# 6、做NMS非极大值抑制
if cfg.SVM_PREDICT_PROBABILITY_WITH_NMS:
# 边框、概率
boxes_probs = predict_svm[:, 2] # 各个边框属于各个类别的概率值
boxes_labels = predict_svm[:, 1].astype(np.int) # 各个边框具体属于那个类别的索引信息
final_boxes_index, final_boxes_prob = nms(
boxes=final_box, # 边框坐标,形状为: [K, 4]
probs=boxes_probs, # 边框属于物体的概率值,形状为: [K,]
boxes_labels=boxes_labels, # 边框具体属于那个物体(id), 形状为: [K,]
prob_threshold=cfg.SVM_PREDICT_PROBABILITY_THRESHOLD # 阈值,只有当概率值超过该值的才被认为是最终的边框
)
print(final_boxes_index)
print("NMS微调之后的边框值:\n{}".format(final_box[final_boxes_index]))
print(final_boxes_prob)
# NMS操作之后所对应的边框
nms_final_boxs = final_box[final_boxes_index]
print("最终的实际边框信息")
print(nms_final_boxs) # 最终的边框坐标[M,4]
print(boxes_labels[final_boxes_index]) # 最终边框所属的类别id[M,]
print(boxes_probs[final_boxes_index]) # 最终边框所属类别的概率值[M,]
# 可视化一下(比较一下回归前和回归后的边框坐标)
show_image = cv.imread(file_path)
for roi, bb_roi in zip(boxs, final_box):
# 回归转换前的边框
cv.rectangle(show_image, pt1=(roi[0], roi[1]),
pt2=(roi[2], roi[3]), color=(255, 0, 0), thickness=2)
# 回归转换之后的边框
cv.rectangle(show_image, pt1=(bb_roi[0], bb_roi[1]),
pt2=(bb_roi[2], bb_roi[3]), color=(0, 0, 255), thickness=2)
# TODO: 在这两个矩形的中心的之间画一个连线
label_2_name = {
1: "DIR0",
2: "DIR1"
}
label_2_color = {
1: (0, 255, 255),
2: (200, 0, 100)
}
for idx in final_boxes_index:
# 最终预测的物体边框
roi = final_box[idx] # 边框坐标
label = boxes_labels[idx] # 所属类别id
prob = boxes_probs[idx] # 所属类别的概率值
color = label_2_color[label]
cv.rectangle(show_image, pt1=(roi[0], roi[1]), pt2=(roi[2], roi[3]),
color=color, thickness=2)
cv.putText(show_image, text='%s:%.3f' % (label_2_name[label], prob),
org=(roi[0] + 10, roi[1] + 10), fontFace=cv.FONT_HERSHEY_SIMPLEX,
fontScale=1, color=color, thickness=2, lineType=cv.LINE_AA)
cv.imwrite('./result.png', show_image)
cv.imshow('show_image', show_image)
cv.waitKey(0)
cv.destroyAllWindows()
else:
# 这里不做NMS,直接取的概率最大的一个。可视化一下(比较一下回归前和回归后的边框坐标)
show_image = cv.imread(file_path)
# for roi, bb_roi in zip(rboxs, final_boxs):
for roi, bb_roi in zip(boxs, final_box):
cv.rectangle(show_image, pt1=(roi[0], roi[1]),
pt2=(roi[2], roi[3]), color=(255, 0, 0), thickness=2)
cv.rectangle(show_image, pt1=(bb_roi[0], bb_roi[1]),
pt2=(bb_roi[2], bb_roi[3]), color=(0, 0, 255), thickness=2)
# TODO: 在这两个矩形的中心的之间画一个连线
cv.imwrite('./result.png', show_image)
cv.imshow('show_image', show_image)
cv.waitKey(0)
cv.destroyAllWindows()
if __name__ == '__main__':
run_solver()
CV-1-目标检测-03-RCNN-03-train_and_predict
最新推荐文章于 2023-05-09 00:39:06 发布