R-CNN原理详解与代码超详细讲解(三)–data_utils代码讲解
check_directory代码详解
def check_directory(path, created=True, error=False):
"""
检查文件或者文件夹路径path是否存在,如果不存在,根据参数created和error进行操作<br/>
要求created和error只能有一个为True
:param path:
:param created: 当文件夹不存在的时候,进行创建
:param error: 当path不存在的时候,报错
:return:
"""
flag = os.path.exists(path)
if not flag:
if created:
os.makedirs(path)
flag = True
elif error:
raise Exception("Path must exists!!{}".format(path))
return flag
resize_image代码详解
def resize_image(in_image, new_width, new_height, out_image=None, resize_mode=cv.INTER_CUBIC):
"""
进行图像大小重置操作
:param in_image: 输入的图像
:param new_width: 新的宽度
:param new_height: 新的高度
:param out_image: 输出对象位置路径
:param resize_mode: 大小重置方式
:return:
"""
image = cv.resize(in_image, (new_width, new_height), resize_mode)
if out_image:
cv.imwrite(out_image, image)
return image
iou代码详解
def iou(box1, box2):
"""
计算边框1和边框2的IoU的值
:param box1: 边框1的坐标值[左上角坐标的x,左上角坐标的y,右小角坐标的x,右小角坐标的y]
:param box2: 边框2的坐标值[左上角坐标的x,左上角坐标的y,右小角坐标的x,右小角坐标的y]
:return:
"""
x = [(box1[0], 1), (box1[2], 1), (box2[0], 2), (box2[2], 2)]
y = [(box1[1], 1), (box1[3], 1), (box2[1], 2), (box2[3], 2)]
x = sorted(x, key=lambda t: t[0])
y = sorted(y, key=lambda t: t[0])
if x[0][1] == x[1][1] or y[0][1] == y[1][1]:
union_area = 0.0
else:
union_area = (x[2][0] - x[1][0]) * (y[2][0] - y[1][0])
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
total_area = box1_area + box2_area - union_area
return 1.0 * union_area / total_area
make_training_data代码详解
def make_training_data(in_file, output_data_file, output_label_file, image_width=227, image_height=227):
"""
构造最原始的数据集,也就是从训练原始图像数据中提取ROI的候选框区域以及相关的属性值
:param in_file: 训练用原始数据存在的txt文件
:param output_data_file: 提取出来的特征属性+目标属性存在的路径
:param output_label_file: 提取出来的class name和id之间映射关系的数据存在的路径
:param image_width: ROI区域图形的最终宽度
:param image_height: ROI区域图形的最终宽度
:return:
"""
check_directory(in_file, created=False, error=True)
check_directory(os.path.dirname(output_data_file))
check_directory(os.path.dirname(output_label_file))
root_dir = os.path.dirname(os.path.abspath(in_file))
class_name_2_index_dict = {}
current_class_index = 1
'''训练数据格式: 2flowers/jpg/0/image_0561.jpg 2 90,126,350,434'''
with open(in_file, 'r', encoding='utf-8') as reader:
datas = []
for line in reader:
values = line.strip().split(" ")
if len(values) != 3:
continue
image_file_path = os.path.join(root_dir, values[0])
class_name = values[1].strip()
try:
image_label = class_name_2_index_dict[class_name]
except KeyError:
image_label = current_class_index
class_name_2_index_dict[class_name] = image_label
current_class_index += 1
l_x, l_y, gw, gh = list(map(int, values[2].split(",")))
r_x = l_x + gw
r_y = l_y + gh
gx = (l_x + r_x) // 2
gy = (l_y + r_y) // 2
image = cv.imread(image_file_path)
ground_truth_image = image[l_y:r_y, l_x:r_x]
_, regions = selective_search(image, scale=500, sigma=0.9, min_size=10)
candidate = set()
for idx, region in enumerate(regions):
rect = region['rect']
size = region['size']
lr_x, lr_y, pw, ph = rect
rr_x = lr_x + pw
rr_y = lr_y + ph
px = (lr_x + rr_x) // 2
py = (lr_y + rr_y) // 2
if rect in candidate:
continue
if size < 200:
continue
if pw * ph < 500:
continue
candidate.add(rect)
region_proposal = image[lr_y:rr_y, lr_x:rr_x]
region_iou = iou(
box1=[l_x, l_y, r_x, r_y],
box2=[lr_x, lr_y, rr_x, rr_y]
)
region_proposal = resize_image(region_proposal,
new_width=image_width,
new_height=image_height)
tx = (gx - px) / pw
ty = (gy - py) / ph
tw = np.log(gw / pw)
th = np.log(gh / ph)
offset_box = [tx, ty, tw, th]
data = []
data.append(region_proposal)
data.append(image_label)
data.append(1)
data.append(region_iou)
data.append(offset_box)
datas.append(data)
data = []
ground_truth_image = resize_image(ground_truth_image,
new_width=image_width,
new_height=image_height)
data.append(ground_truth_image)
data.append(image_label)
data.append(0)
data.append(1.0)
data.append([0, 0, 0, 0])
datas.append(data)
np.save(output_data_file, datas)
with open(output_label_file, 'wb') as writer:
pickle.dump(class_name_2_index_dict, writer)
FlowerDataLoader代码详解
class FlowerDataLoader(object):
'''数据生成器'''
def __init__(self, one_hot=True):
self.image_width = cfg.IMAGE_WIDTH
self.image_height = cfg.IMAGE_HEIGHT
self.fine_tune_positive_batch_size = cfg.FINE_TUNE_POSITIVE_BATCH_SIZE
self.fine_tune_negative_batch_size = cfg.FINE_TUNE_NEGATIVE_BATCH_SIZE
self.fine_tune_iou_threshold = cfg.FINE_TUNE_IOU_THRESHOLD
self.higher_features_iou_threshold = 0.3
fine_tune_X = []
fine_tune_Y = []
total_fine_tune_samples = 0
fine_tune_positive_samples = 0
fine_tune_negative_samples = 0
fine_tune_positive_samples_index = []
fine_tune_negative_samples_index = []
higher_features_Y = []
higher_features_label_2_samples_index = defaultdict(list)
higher_features_label_2_negative_samples = 0
higher_features_label_2_positive_samples = 0
print("Start load training data.....")
if not check_directory(cfg.TRAIN_DATA_FILE_PATH, False, False):
'''如果training_data文件不存在,那么就进行make_training_data'''
print("Training data file not exists, So load traning data and save to file.....")
make_training_data(in_file=cfg.ORIGINAL_FINE_TUNE_DATA_FILE_PATH,
output_data_file=cfg.TRAIN_DATA_FILE_PATH,
output_label_file=cfg.TRAIN_LABEL_DICT_FILE_PATH,
image_width=self.image_width, image_height=self.image_height)
datas = np.load(cfg.TRAIN_DATA_FILE_PATH,allow_pickle=True)
for idx, (image, label, box_type, region_iou, box) in enumerate(datas):
fine_tune_X.append(image)
total_fine_tune_samples += 1
if region_iou > self.fine_tune_iou_threshold:
fine_tune_Y.append(label)
fine_tune_positive_samples_index.append(idx)
fine_tune_positive_samples += 1
else:
fine_tune_Y.append(0)
fine_tune_negative_samples_index.append(idx)
fine_tune_negative_samples += 1
if region_iou < self.higher_features_iou_threshold:
higher_features_label_2_negative_samples += 1
higher_features_Y.append(0)
higher_features_label_2_samples_index[label].append(idx)
else:
higher_features_Y.append(label)
if int(box_type) == 0:
higher_features_label_2_positive_samples += 1
higher_features_label_2_samples_index[label].append(idx)
print("Complete load training data!!!! Total samples:{}".format(total_fine_tune_samples))
print("Fine tune positive example:{}, negative example:{}".format(fine_tune_positive_samples,
fine_tune_negative_samples))
print("Higher Features positive sample:{}, negative example:{}".format(higher_features_label_2_positive_samples,
higher_features_label_2_negative_samples))
print('*'*30)
print('higher_features_label_2_samples_index:',higher_features_label_2_samples_index)
self.fine_tune_x = np.asarray(fine_tune_X)
if one_hot:
one_hot_encoder = OneHotEncoder(sparse=False, categories='auto')
self.fine_tune_y = np.asarray(one_hot_encoder.fit_transform(np.reshape(fine_tune_Y, (-1, 1))))
pass
else:
self.fine_tune_y = np.asarray(fine_tune_Y).reshape((-1, 1))
self.total_fine_tune_samples = total_fine_tune_samples
self.fine_tune_positive_samples = fine_tune_positive_samples
self.fine_tune_negative_samples = fine_tune_negative_samples
self.fine_tune_positive_cursor = 0
self.fine_tune_negative_cursor = 0
self.fine_tune_positive_samples_index = np.asarray(fine_tune_positive_samples_index)
self.fine_tune_negative_samples_index = np.asarray(fine_tune_negative_samples_index)
np.random.shuffle(self.fine_tune_positive_samples_index)
np.random.shuffle(self.fine_tune_negative_samples_index)
print('fine_tune_positive_samples_index:',fine_tune_positive_samples_index)
print('fine_tune_negative_samples_index:',fine_tune_negative_samples_index)
print('higher_features_Y',higher_features_Y)
self.higher_features_y = np.asarray(higher_features_Y)
self.higher_features_label_2_samples_index = higher_features_label_2_samples_index
def __fetch_batch(self, batch_size, cursor, total_samples, x, y, index):
"""
基于给定的数据获取当前批次的数据(X\Y)以及下一个批次获取前是否需要进行数据的重置操作
:param batch_size:
:param cursor:
:param total_samples:
:param x:
:param y:
:param index:
:return:
"""
need_reset_data = False
start_idx = cursor * batch_size
end_idx = start_idx + batch_size
if end_idx >= total_samples:
need_reset_data = True
sample_index = index[start_idx:end_idx]
images = x[sample_index]
labels = y[sample_index]
return images, labels, need_reset_data
def get_fine_tune_batch(self):
"""
按照给定的属性获取正样本和负样本,并合并返回
:return:
"""
positive_images, positive_labels, flag = self.__fetch_batch(
batch_size=self.fine_tune_positive_batch_size,
cursor=self.fine_tune_positive_cursor,
total_samples=self.fine_tune_positive_samples,
x=self.fine_tune_x,
y=self.fine_tune_y,
index=self.fine_tune_positive_samples_index)
if flag:
print("Reset fine tune positive samples!!!")
self.fine_tune_positive_cursor = 0
np.random.shuffle(self.fine_tune_positive_samples_index)
else:
self.fine_tune_positive_cursor += 1
negative_images, negative_labels, flag = self.__fetch_batch(
batch_size=self.fine_tune_negative_batch_size,
cursor=self.fine_tune_negative_cursor,
total_samples=self.fine_tune_negative_samples,
x=self.fine_tune_x,
y=self.fine_tune_y,
index=self.fine_tune_negative_samples_index)
if flag:
print("Reset fine tune negative samples!!!")
self.fine_tune_negative_cursor = 0
np.random.shuffle(self.fine_tune_negative_samples_index)
else:
self.fine_tune_negative_cursor += 1
images = np.concatenate([positive_images, negative_images], axis=0)
labels = np.concatenate([positive_labels, negative_labels], axis=0)
return images, labels
def get_structure_higher_features(self, label):
"""
基于给定的标签获取训练svm用的原始数据
:param label:
:return:
"""
if label in self.higher_features_label_2_samples_index:
index = self.higher_features_label_2_samples_index[label]
index = index[:10]
return self.fine_tune_x[index], self.higher_features_y[index]
else:
return None, None