yolov3前向传播（三）-- 坐标转换,iou计算,权重加载,图片显示

最新推荐文章于 2024-08-05 18:41:17 发布

淡定的炮仗

最新推荐文章于 2024-08-05 18:41:17 发布

阅读量2.1k

点赞数 2

文章标签： tensorflow 神经网络

本文链接：https://blog.csdn.net/m0_43609475/article/details/107696062

版权

坐标转换,iou计算,权重加载,图片显示

一、坐标转换

1、分析

在这里插入图片描述

2、实现

# =====================================坐标值转化函数==========================================================
# 定义函数：将中心点、高、宽坐标 转化为[x0, y0, x1, y1]坐标形式
# 将anchor的中心点坐标表示形式转化为左上右下坐标表示形式
def detections_boxes(detections):
    '''
    :param detections: YOLO检测层的输出 形状 (1, 10647, 5+num_anchor)
    :return:将anchor的中心点坐标表示形式转化为左上右下坐标表示形式的所有anchor 形状 (1, 10647, 5+num_anchor)
    '''
    # 将detections (1, 10647, 5+num_class) 切分为(1, 10647, 1)、(1, 10647, 1)、(1, 10647, 1)、(1, 10647, 1)、(1, 10647, num_class+1)
    center_x, center_y, width, height, attrs = tf.split(detections, [1, 1, 1, 1, -1], axis=-1)
    w2 = width / 2                  # 二分之1宽
    h2 = height / 2                 # 二分之1高
    x0 = center_x - w2              # anchor左上角 x 值
    y0 = center_y - h2              # anchor左上角 y 值
    x1 = center_x + w2              # anchor右下角 x 值
    y1 = center_y + h2              # anchor右下角 y 值
    # 将坐标值放到一起 一列是一个anchor的左上右下坐标值
    boxes = tf.concat([x0, y0, x1, y1], axis=-1)
    # 再组合回去 detections 形状 (1, 10647, 5+num_anchor)
    detections = tf.concat([boxes, attrs], axis=-1)
    return detections

二、IOU计算函数

1、分析

在这里插入图片描述

2、实现

# =====================================IOU计算函数==========================================================
#定义函数计算两个框的内部重叠情况（IOU）box1，box2为左上、右下的坐标[x0, y0, x1, x2]
# 计算两个anchor的IOU值
def _iou(box1, box2):
    '''
    :param box1: 第一个anchor的左上右下坐标，形如[x0, y0, x1, y1]
    :param box2: 第二个anchor的左上右下坐标，形如[x0, y0, x1, y1]
    :return: 两个anchor的IOU值
    '''
    # 获取具体坐标值
    b1_x0, b1_y0, b1_x1, b1_y1 = box1
    b2_x0, b2_y0, b2_x1, b2_y1 = box2
    # 计算两个anchor重叠部分的坐标
    # 前两行的较大的值的组合是属于两个anchor重叠部分的右上角的坐标
    # 后两行的较小的值的组合是属于两个anchor重叠部分的左下角的坐标
    int_x0 = max(b1_x0, b2_x0)
    int_y0 = max(b1_y0, b2_y0)
    int_x1 = min(b1_x1, b2_x1)
    int_y1 = min(b1_y1, b2_y1)
    # 计算anchor重叠的面积
    int_area = (int_x1 - int_x0) * (int_y1 - int_y0)
    # 分别计算两个anchor的面积
    b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
    b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)

    # 计算两个anchor的IOU值，两个anchor的交集面积除以两个anchor并集的面积
    #分母加个1e-05，避免除数为 0
    iou = int_area / (b1_area + b2_area - int_area + 1e-05)
    return iou

三、非极大值抑制函数

1、分析

在这里插入图片描述

2、实现

# ====================================================== 使用非极大值抑制（NMS）方法，对结果去重===================================================
def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4):
    '''
    :param predictions_with_boxes: YOLO检测层的输出经过detections_boxes处理之后。 形如 [1, 10647, 5+num_anchor]
    :param confidence_threshold: 置信度阈值
    :param iou_threshold: iou阈值
    :return:同一类别中，相距比较远的anchor。
    '''
    # predictions_with_boxes[:, :, 4] 是获取到predictions_with_boxes中的confidence值，形状类似为 [1,10647]
    # predictions_with_boxes[:, :, 4] > confidence_threshold 判断所有anchor的confidence值是否大于confidence_threshold，返回true或者false 形状类似为 [1,10647]
    # np.expand_dims((predictions_with_boxes[:, :, 4] > confidence_threshold), -1) 将形状[1,10647] 在最后一个维度增加一个维度 形状变成[1,10647，1]
    # conf_mask = [[[False],[ True],...,[ True]]] 形状[1,10647，1]
    conf_mask = np.expand_dims((predictions_with_boxes[:, :, 4] > confidence_threshold), -1)
    # 利用conf_mask值将predictions_with_boxes中小于confidence_threshold的值的confidence变成0(实际上一行数据全变成0了)  原理 True*数值 = 原数值 False*数值 = 0
    # 满足置信度条件的anchor的预测值不变，不满足置信度条件的anchor的预测值全变成0
    # predictions_with_boxes的形状为[1,10647，5+c]
    # predictions 的形状为[1,10647，5+c]   只是不满足置信度条件的anchor的预测值全变成0
    predictions = predictions_with_boxes * conf_mask  # 矩阵对应值相乘

    result = {}  # 定义键值对备用
    for i, image_pred in enumerate(predictions):  # 遍历置信度处理后的prediction 形状为 [1,10647，5+c] 其中5是左上坐标x0 y0 右下坐标x1 y1 置信度confidence c是分类数
        shape = image_pred.shape                  # 获取prediction中每一个元素的形状
        print("shape1",shape)                     # 形状[10647，5+c]
        non_zero_idxs = np.nonzero(image_pred)    # 返回非0元素下标 这是所有维度的组合坐标  我们只需要第一个维度非0值的坐标
        unique_idxs = list(set(non_zero_idxs[0])) # 得到第一个维度非0值的坐标
        image_pred = image_pred[unique_idxs]      # 获取非0元素下标的预测值 形状[满足置信度条件的anchor个数，5+c]
        print("shape2",image_pred.shape)          # 形状[满足置信度条件的anchor个数，5+c]
        image_pred = image_pred.reshape(-1, shape[-1]) # 形状[满足置信度条件的anchor个数，5+c]

        bbox_attrs = image_pred[:, :5]            # 满足置信度条件的预测值的前5个预测值:位置和置信度预测值  形状[满足置信度条件的anchor个数，5]
        classes = image_pred[:, 5:]               # 满足置信度条件的预测值的前5个以后预测值:分类预测值      形状[满足置信度条件的anchor个数，c]
        classes = np.argmax(classes, axis=-1)     # 获取满足置信度条件的分类预测值的预测编号               形状[满足置信度条件的anchor个数，] 1维

        unique_classes = list(set(classes.reshape(-1)))  # 去重 获得该图片中的分类种类                   形状[该图片中的分类种类数，]1维

        for cls in unique_classes:    # 遍历该图片中的每一种类别
            # 判断classes中每个元素是否等于cls,元素等于是True不等是False
            cls_mask = classes == cls # 判断class中属于当前分类 cls类有哪些元素,True代表属于，False代表不属于  返回cls_mask 内容类似于[False  True False ... True False]
            # np.nonzero(cls_mask) 返回cls_mask中的非0的元素的下标
            # 根据下标找到同一类的所有anchor。形状[满足置信度条件的属于同一类别的anchor个数，5] 5 左上右下坐标+置信度
            cls_boxes = bbox_attrs[np.nonzero(cls_mask)]  # cls_boxes形状[满足置信度条件的属于同一类别的anchor个数，5]
            cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]] # 将同一类别的anchor按照置信度的大小排序。由大到小 形状[满足置信度条件的属于同一类别的anchor个数，5]
            cls_scores = cls_boxes[:, -1]           # 获取同一类别的anchor的置信度[满足置信度条件的属于同一类别的anchor个数，1] 同一类anchor置信度排序后的置信度
            cls_boxes = cls_boxes[:, :-1]           # 获取同一类别的anchor的置信度[满足置信度条件的属于同一类别的anchor个数，4]同一类anchor置信度排序后的anchor左上右下坐标

            while len(cls_boxes) > 0:               # 终止条件cls_boxes中没有元素了
                box = cls_boxes[0]                  # 由于cls_boxes存放的是同一类anchor置信度排序后的anchor左上右下坐标，所以cls_boxes[0]代表的是置信度最大的anchor
                score = cls_scores[0]               # 获取最大置信度anchor的置信度值
                if not cls in result:               # 如果该分类不在键值对result中，则生成以该分类为键，以列表为值的键值对
                    result[cls] = []
                result[cls].append((box, score))    # 将最大置信度anchor的坐标值，和置信度值添加到以该分类为键，以列表为值的列表中
                cls_boxes = cls_boxes[1:]           # 用列表存放除了最大置信度anchor之外的所有anchor
                ious = np.array([_iou(box, x) for x in cls_boxes])  # 由最大置信度anchor依次与除了最大置信度anchor之外的所有anchor一个个的做IOU计算，计算结果存放到ious中，计算顺序是按照置信度大小的顺序
                iou_mask = ious < iou_threshold                     # 找到上一步计算的iou值列表中那些元素小于IOU阈值
                cls_boxes = cls_boxes[np.nonzero(iou_mask)]         # 找到iou列表中小于等于iou阈值的所有元素的下标，并根据下标找到对应的anchor，其顺序依然是按照置信度大小由大到小排列的
                cls_scores = cls_scores[np.nonzero(iou_mask)]       # 找到iou列表中小于等于iou阈值的所有元素的下标，并根据下标找到对应的anchor，其顺序依然是按照置信度大小由大到小排列的
                # 接着找最大最大置信度anchor，与余下的anchor接着做iou计算重复while的内容，最终保留同一类别中，相距比较远的anchor。

    return result

四、结果显示

即将检测结果显示在图片上

1、分析

在这里插入图片描述

2、实现

# 将检测结果显示在图片上
def draw_boxes(boxes, img, cls_names, detection_size):
    '''
    :param boxes: 非极大值抑制后的anchor框 形如{0: [(array([16., 30., 35., 45.]), 0.5)], 2: [(array([ 5.,  8., 15., 14.]), 0.7)], 3: [(array([43., 53., 65., 65.]), 0.6), (array([15., 18., 25., 24.]), 0.6)], 4: [(array([27., 25., 37., 28.]), 0.5)]}
    :param img:  输入到网络的原图片
    :param cls_names: 分类编号和分类名称对应的字典
    :param detection_size: 输入到网络的原图片统一调整后的大小 这里是416
    :return: 在原图上画好框标好分类的原图片
    '''
    draw = ImageDraw.Draw(img)  # 创建一个可以在给定图像（img）上绘图的对象

    for cls, bboxs in boxes.items(): # 遍历键值对的键和值  键cls表示分类  值bboxs表示坐标+置信度 形状[2],即[(4个坐标值),(1个置信度)]
        color = tuple(np.random.randint(0, 256, 3)) # 产生代表随机颜色随机数
        for box, score in bboxs:    # 遍历坐标和置信度
            box = convert_to_original_size(box, np.array(detection_size), np.array(img.size))  # 从416*416大小图片上还原到原始图片大小
            draw.rectangle(box, outline=color)  # 依据坐标点和随机颜色画出相应的anchor
            draw.text(box[:2], '{} {:.2f}%'.format(cls_names[cls], score * 100), fill=color) # 在框的左上角写上分类名称和置信度
            print('{} {:.2f}%'.format(cls_names[cls], score * 100),box[:2])

def convert_to_original_size(box, size, original_size): # 从416*416大小图片上还原到原始图片大小
    '''
    :param box: 基于416*416图片上的anchor坐标 形如[16., 30., 35., 45.] 左上右下坐标
    :param size: 预测时所有图片统一后的大小 这里是416
    :param original_size: 没有统一大小时的原图片的大小
    :return: 返回基于416*416图片上的anchor坐标还原到没有统一大小时的原图片的大小上的坐标，即等比例缩放
    '''
    ratio = original_size / size    # 缩放比例
    box = box.reshape(2, 2) * ratio # 根据缩放比例进行缩放坐标
    return list(box.reshape(-1))    # 将缩放后的坐标变成 一行数据 即维度为1的列表

五、权重加载

1、实现

def load_weights(var_list, weights_file):
    '''
    :param var_list: 变量名称列表
    :param weights_file: 二进制基于coco训练的基于darknet53的yolov3权重
    :return: 权重op
    '''
    # 以读二进制的方式打开文件
    # np.fromfile(frame, dtype=float, count=‐1, sep='')
    #     frame : 文件、字符串
    #     dtype : 读取的数据类型
    #     count : 读入元素个数，‐1表示读入整个文件 5表示读取前5个int32
    #     sep : 数据分割字符串，如果是空串，写入文件为二进制

    with open(weights_file, "rb") as fp:
        # 读取前5个int32，不要，前5个int32是标题信息
        _ = np.fromfile(fp, dtype=np.int32, count=5)  # 跳过前5个int32
        # 接着读取最后的所有float32,存入weights列表中
        weights = np.fromfile(fp, dtype=np.float32)

    ptr = 0
    i = 0 # 计数变量
    assign_ops = []
    while i < len(var_list) - 1:    # 遍历变量列表
        var1 = var_list[i]          # 从变量列表中获取第一个变量
        var2 = var_list[i + 1]      # 从变量列表中获取第二个变量
        # 到卷积项
        if 'Conv' in var1.name.split('/')[-2]: # 按'/'分割var1.name，然后返回倒数第二个元素，并判断'Conv'是否在返回的元素中
            # 找到BN参数项
            if 'BatchNorm' in var2.name.split('/')[-2]:# 按'/'分割var1.name，然后返回倒数第二个元素，并判断'BatchNorm'是否在返回的元素中
                # 加载批量归一化参数
                gamma, beta, mean, var = var_list[i + 1:i + 5] # 从变量列表中获取BN参数
                batch_norm_vars = [beta, gamma, mean, var]      # 将BN参数放到一个列表中
                for var in batch_norm_vars:
                    shape = var.shape.as_list()
                    num_params = np.prod(shape)
                    var_weights = weights[ptr:ptr + num_params].reshape(shape)
                    ptr += num_params
                    assign_ops.append(tf.assign(var, var_weights, validate_shape=True))

                i += 4#已经加载了4个变量，指针移动4
            elif 'Conv' in var2.name.split('/')[-2]:
                bias = var2
                bias_shape = bias.shape.as_list()
                bias_params = np.prod(bias_shape)
                bias_weights = weights[ptr:ptr + bias_params].reshape(bias_shape)
                ptr += bias_params
                assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))

                i += 1#移动指针

            shape = var1.shape.as_list()
            num_params = np.prod(shape)
            #加载权重
            var_weights = weights[ptr:ptr + num_params].reshape((shape[3], shape[2], shape[0], shape[1]))
            var_weights = np.transpose(var_weights, (2, 3, 1, 0))
            ptr += num_params
            assign_ops.append(tf.assign(var1, var_weights, validate_shape=True))
            i += 1

    return assign_ops