DeepLearning.ai code笔记4:卷积神经网络

1、卷积基本结构

Stride 步长: 滤波器在原始图片上每次水平或垂直移动的距离。

卷积大小的计算,滤波器大小为 f ,步长为 s , 填充为 p, 输入图片为 n×n n × n ,则卷积得到的特征图大小为, 下标 f 表示向下取整:

[n+2pfs]f×[n+2pfs]f [ n + 2 p − f s ] f × [ n + 2 p − f s ] f

主要步骤:

  • Convolution functions, including:
    Zero Padding 零填充
    Convolve window 卷积窗口
    Convolution forward
    Convolution backward (optional)
  • Pooling functions, including:
    Pooling forward
    Create mask
    Distribute value
    Pooling backward (optional)

2、ResNet

这里写图片描述

2、YOLO

If you were to run your session in a for loop over all your images. Here’s what you would get:

YOLO is a state-of-the-art object detection model that is fast and accurate
It runs an input image through a CNN which outputs a 19x19x5x85 dimensional volume.
The encoding can be seen as a grid where each of the 19x19 cells contains information about 5 boxes.
You filter through all the boxes using non-max suppression. Specifically:
Score thresholding on the probability of detecting a class to keep only accurate (high probability) boxes
Intersection over Union (IoU) thresholding to eliminate overlapping boxes

使用非最大抑制来过滤所有框。特别对检测类的概率进行阈值评分,以保留仅准确(高概率)的框,通过联合(iou)阈值交集以消除重叠框。

"""
@Author : Peng
@Time : 2018/3/23
info :  YOLO的使用, 80个类,5个anchor boxes,Feature Map(19*19)
"""
import numpy as np
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
import os
import scipy.misc

from LearningDL.task4_3.yad2k.models import keras_darknet19, keras_yolo
from LearningDL.task4_3 import yolo_utils

import keras.backend as K

K.set_image_data_format('channels_last')


def yolo_filters_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    通过阀值过滤候选框
    :param box_confidence: shape[19,19,5,1], 包含19*19个单元中每5个anchor boxes的置信度, 训练出来的
    :param boxes: shape[19.19,5,4], 包含19*19个单元每个对应的5个anchor boxes的[bx, by, bh, bw]
    :param box_class_probs: shape[19, 19, 5, 80], 包含每个anchor boxes中含有80个类的概率系数
    :param threshold: IoU阀值
    :return: scores, boxes, classes, 选定的boxes对应的分数、位置和包含类
    """
    box_scores = box_confidence * box_class_probs  # shape[19,19,5,80]
    box_classes = K.argmax(box_scores, axis=-1)  # argmax返回最大值的“坐标”, 标识类, shape[19,19,5]
    box_class_scores = K.max(box_scores, axis=-1)  # max 返回最大值“的值”, 标识值, shape[19,19,5]

    # 使用掩码获取IoU不小于阀值的anchor boxex及相关信息,即将19x19x5个box符合条件的写到一个列表中,shape[None,?]
    filtering_mask = (box_class_scores >= threshold)  # shape[19,19,5]
    # [19,19,5] -> (?,) ?表示不确定,逗号后面没有数据表示一维
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    # (?,4)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    # (?,)
    classes = tf.boolean_mask(box_classes, filtering_mask)

    return scores, boxes, classes


def iou(box1, box2):
    """
    iou算法的实现
    :param box1: 包含左上角坐标和右下角坐标(x1,y1,x2,y2)
    :param box2: 包含左上角坐标和右下角坐标(x1,y1,x2,y2)
    :return:
    """
    # 求交集的左上和右下坐标,往中间“挤”
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    inter_area = np.abs(x2 - x1) * np.abs(y2 - y1)  # 求交集面积,np.abs绝对值
    box1_area = np.abs(box1[2] - box1[0]) * np.abs(box1[3] - box1[1])
    box2_area = np.abs(box2[2] - box2[0]) * np.abs(box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area

    iou_value = inter_area / union_area
    return iou_value


def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_thsehold=0.5):
    """
    非最大值抑制算法保留最佳boxes
    :param scores: shape(None,),boxes含有某个类的概率
    :param boxes: shape(None,4),boxes列表
    :param classes: shape(None,),boxes含有的是什么类
    :param max_boxes: int,保留少个max_boxes
    :param iou_thsehold: float, 阀值
    :return: scores[,None], boxes[4, None], classes[, None]
    """
    max_boxes_tensor = K.variable(max_boxes, dtype=tf.int32)
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))

    # 获取非最大值过滤索引列表
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_thsehold)
    # 按索引抽取聚集
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)

    return scores, boxes, classes


def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
    """
    将YOLO的预测框输出使用我们的过滤算法得到最终的scores,boxes,classes
    YOLO 没有将非最大抑制算法作为核心部分,这里使用的YOLO预训练模型其输出结果仍然是未过滤的可能重叠的anchor boxes
    :param yolo_outputs:
    :param image_shape:
    :param max_boxes:
    :param score_threshold: 取最大值然后抛弃其他 scores>score_threshold 的boxes
    :param iou_threshold:
    :return:
    """
    # 获取yolo的输出
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
    # 将(中点,宽高)改为坐标形式
    boxes = keras_yolo.yolo_boxes_to_corners(box_xy, box_wh)

    # 使用前面定义的过滤算法
    scores, boxes, classes = yolo_filters_boxes(box_confidence, boxes, box_class_probs, score_threshold)

    # 缩放图片以进行boxes标记
    boxes = yolo_utils.scale_boxes(boxes, image_shape)

    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)
    return scores, boxes, classes


def unit_test():
    """ yolo_filters_boxes 通过阀值过滤候选框测试"""
    # with tf.Session() as session:
    #     box_confidence = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1)
    #     boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed=1)
    #     box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)
    #     scores, boxes, classes = yolo_filters_boxes(box_confidence, boxes, box_class_probs, threshold=0.5)
    #     print("scores[2] = " + str(scores[2].eval()))
    #     print("boxes[2] = " + str(boxes[2].eval()))
    #     print("classes[2] = " + str(classes[2].eval()))
    #     print("scores.shape = " + str(scores.shape))
    #     print("boxes.shape = " + str(boxes.shape))
    #     print("classes.shape = " + str(classes.shape))

    """ IoU 测试 """
    # box1 = (2, 1, 4, 3)
    # box2 = (1, 2, 3, 4)
    # print('iou value is {}'.format(iou(box1, box2)))

    """yolo_non_max_suppression 非最大值抑制算法预测"""
    # with tf.Session() as session:
    #     scores = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
    #     boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed=1)
    #     classes = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
    #     scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
    #     print("scores[2] = " + str(scores[2].eval()))
    #     print("boxes[2] = " + str(boxes[2].eval()))
    #     print("classes[2] = " + str(classes[2].eval()))
    #     print("scores.shape = " + str(scores.eval().shape))
    #     print("boxes.shape = " + str(boxes.eval().shape))
    #     print("classes.shape = " + str(classes.eval().shape))

    """ yolo_eval 过滤boxes的测试 """
    with tf.Session() as session:
        yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1),
                        tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),
                        tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),
                        tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1))
        scores, boxes, classes = yolo_eval(yolo_outputs)
        print("scores[2] = " + str(scores[2].eval()))
        print("boxes[2] = " + str(boxes[2].eval()))
        print("classes[2] = " + str(classes[2].eval()))
        print("scores.shape = " + str(scores.eval().shape))
        print("boxes.shape = " + str(boxes.eval().shape))
        print("classes.shape = " + str(classes.eval().shape))


def predict(sess, image_file, scores, boxes, classes, yolo_model, class_name):
    """
    预测
    :param sess:
    :param image_file:
    :param scores:
    :param boxes:
    :param classes:
    :param yolo_model:
    :param class_name:
    :return:
    """
    image, image_data = yolo_utils.preprocess_image("images/" + image_file, model_image_size=(608, 608))
    out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes],
                                                  feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})

    print("Found {} boxes for {}".format(len(out_boxes), image_file))
    colors = yolo_utils.generate_colors(class_name)
    yolo_utils.draw_boxes(image, out_scores, out_boxes, out_classes, class_name, colors)
    image.save(os.path.join("out", image_file), quality=90)
    # Display the results in the notebook
    output_image = scipy.misc.imread(os.path.join("out", image_file))
    plt.imshow(output_image)
    plt.show()

    return out_scores, out_boxes, out_classes


if __name__ == '__main__':
    # unit_test()

    # yolo 图片测试
    sess = K.get_session()
    class_name = yolo_utils.read_classes("model_data/coco_classes.txt")
    anchors = yolo_utils.read_anchors("model_data/yolo_anchors.txt")
    image_shape = (720., 1280.)
    # 加载yolo_v2预训练模型
    yolo_model = keras.models.load_model("model_data/yolo.h5")
    yolo_model.summary()
    # 获取yolo输出
    yolo_outputs = keras_yolo.yolo_head(yolo_model.output, anchors, len(class_name))
    # 获取过滤后的预测信息
    scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

    out_scores, out_boxes, out_classes = predict(sess, "test.jpg", scores, boxes, classes, yolo_model, class_name)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值