keras-yolo3图片检测代码讲解

最新推荐文章于 2024-08-08 07:51:05 发布

虎娃娃huwawa

最新推荐文章于 2024-08-08 07:51:05 发布

阅读量2.2k

点赞数

分类专栏： keras-yolo3

本文链接：https://blog.csdn.net/daisy_d_/article/details/106437983

版权

keras-yolo3 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

花了一段时间看keras版本利用模型进行图片检测的代码
代码链接：[https://github.com/qqwweee/keras-yolo3]
(https://github.com/qqwweee/keras-yolo3)
如理解不到位的地方，希望批评指正

还写得不够完整，以后有空还会补充，请多包涵！！！

用于图片检测主要包含以下文件：
在这里插入图片描述

首先声明我用的tiny-yolo，num_anchors=6，我的类别是1.

yolo_tesh_batch.py

主要功能：将文件夹里的图片单个单个输入进行检测，将检测结果保存。
第一部分：if name 部分；进行传参，调用detect_img().
第二部分：detect_img()

def detect_img(yolo):
    path = "set/testimage/*.jpg"  #检测图片路径
    outdir = "set/testtimage"     #输出检测检测结果路径
    for jpgfile in glob.glob(path):#按顺序读取文件
        img = Image.open(jpgfile) #打开图片
        img = yolo.detect_image(img) #调用yolo.py的detect_image进行检测
        img.save(os.path.join(outdir, os.path.basename(jpgfile))) #保存检测结果
    yolo.close_session() ##
FLAGS = None

yolo.py

定义yolo类

   def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)  # set up default values
        self.__dict__.update(kwargs)  # and update with user overrides 将刚才设置的参数传进来
        self.class_names = self._get_class()  #得到类别名字
        self.anchors = self._get_anchors()    #得到anchor框 tiny-yolo:narray(6,2)
        self.sess = K.get_session()   ##
        self.boxes, self.scores, self.classes = self.generate()

generate()得到检测的box，score，classes

    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.断言 模型必须是.h5文件'
	
        # Load model, or construct model and load weights.
        num_anchors = len(self.anchors)   #6
        num_classes = len(self.class_names) #1
        is_tiny_version = num_anchors == 6  # default setting
        try:
            self.yolo_model = load_model(model_path, compile=False)
        except:
            self.yolo_model = tiny_yolo_body(Input(shape=(None, None, 3)), num_anchors // 2, num_classes) \
                if is_tiny_version else yolo_body(Input(shape=(None, None, 3)), num_anchors // 3, num_classes)
            self.yolo_model.load_weights(self.model_path)  # make sure model, anchors and classes match
        else:
        #断言，最后一层得到的输出要跟类别和anchor匹配
            assert self.yolo_model.layers[-1].output_shape[-1] == \
                   num_anchors / len(self.yolo_model.output) * (num_classes + 5), \
                'Mismatch between model and given anchor and class sizes'

        print('{} model, anchors, and classes loaded.'.format(model_path))

        # Generate colors for drawing bounding boxes.
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        # map(function,sequence)
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
        np.random.seed(10101)  # Fixed seed for consistent colors across runs.
        np.random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
        np.random.seed(None)  # Reset seed to default.

        # Generate output tensor targets for filtered bounding boxes.
        self.input_image_shape = K.placeholder(shape=(2,)) #占位符
        if self.gpu_num >= 2:
            self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
                                           len(self.class_names), self.input_image_shape,
                                           score_threshold=self.score, iou_threshold=self.iou)
        return boxes, scores, classes

yolo_eval（）在model.py

处理网络输出返回boxes, scores, classes

def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):
    """

    :param yolo_outputs: [(?,13,13,18)(?,26,26,18)(?,52,52,18)] ?:batch size  16:3(1+5)
    :param anchors:10,14,  23,27,  37,58,  81,82,  135,169,  344,319
    :param num_classes: 1
    :param image_shape:(416,416)
    :param max_boxes:  one classes contain max box
    :param score_threshold: 0.1 confidence score
    :param iou_threshold: 0.45   over area
    :return:
    """
    """Evaluate YOLO model on given input and return filtered boxes."""
    num_layers = len(yolo_outputs)
    anchor_mask = [[3, 4, 5], [1, 2, 3]]  # list 2 one layer has 3 anchor
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32  # 13*32=416 (416,416)
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    # K.concatenate:将数据展平 ->(?,4)
    boxes = K.concatenate(boxes, axis=0)
    # ->(?,)
    box_scores = K.concatenate(box_scores, axis=0)
    # MASK掩码，过滤小于score阈值的值，只保留大于阈值的值
    mask = box_scores >= score_threshold
    # 最大检测框数20 constant常量
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_

处理网络每一层的box，score

def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
    """

    :param feats: yolo.outputs[l]
    :param anchors: 3
    :param num_classes: 1
    :param input_shape: 416*416
    :param image_shape: 416*416
    :return:
    """
    '''Process Conv layer output'''
    # yolo_head():box_xy是box的中心坐标，(0~1)相对位置；box_wh是box的宽高，(0~1)相对值；
    # box_confidence是框中物体置信度；box_class_probs是类别置信度
    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
                                                                anchors, num_classes, input_shape)
    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
    boxes = K.reshape(boxes, [-1, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, num_classes])
    return boxes, box_scores

初步得到x, y, w, h,置信度，类别

def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """

    :param feats: feats:shape，->(?,13,13,18) all the example is the first dimension
    :param anchors: 3个anchor box
    :param num_classes: 1
    :param input_shape:
    :param calc_loss:
    :return:
    """
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors) #3
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])  # reshape ->(1,1,1,3,2)

    grid_shape = K.shape(feats)[1:3]  # height, width (?,13,13,255)  -> (13,13)
    # tensor: grid_y和grid_x用于生成网格grid，通过arange、reshape、tile的组合，
    # 创建y轴的0~12的组合grid_y，再创建x轴的0~12的组合grid_x，将两者拼接concatenate，就是grid
    # shape=(13, 13, 1, 1)
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    # shape=(13, 13, 1, 2), dtype=int32
    grid = K.concatenate([grid_x, grid_y])
    # 转换数据类型 第二个参数类型
    grid = K.cast(grid, K.dtype(feats))
    # [?,13,13,3,1+5] (?,?,?,18)
    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

将得到的box坐标进行转换

def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
    '''Get corrected boxes'''
    box_yx = box_xy[..., ::-1]  ##“::-1”是颠倒数组的值 [?,13,13,2]
    box_hw = box_wh[..., ::-1]
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))
    new_shape = K.round(image_shape * K.min(input_shape / image_shape))
    offset = (input_shape - new_shape) / 2. / input_shape
    scale = input_shape / new_shape
    box_yx = (box_yx - offset) * scale
    box_hw *= scale

    box_mins = box_yx - (box_hw / 2.)
    box_maxes = box_yx + (box_hw / 2.)
    boxes = K.concatenate([
        box_mins[..., 0:1],  # y_min
        box_mins[..., 1:2],  # x_min
        box_maxes[..., 0:1],  # y_max
        box_maxes[..., 1:2]  # x_max
    ])

    # Scale boxes back to original image shape.
    boxes *= K.concatenate([image_shape, image_shape])
    return boxes

detect_image

    def detect_image(self, image):
        start = timer()

        if self.model_image_size != (None, None):
            assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')

        print(image_data.shape)
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })

        if len(out_boxes) > 1:
            res = self.nms(out_boxes, out_scores)
            res = np.array(res)
            out_boxes = res[:, :4]
            out_scores = res[:, 4:]
            out_scores = np.ravel(out_scores)
        w = out_scores.shape[0]

        print("w:{}".format(w))
        print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

        # for i, c in reversed(list(enumerate(out_classes))):
        c = 0
        for i in range(0, w):
            predicted_class = self.class_names[c]
            box = out_boxes[i]  # 循环次数注意！！！！！！！！
            score = out_scores[i]

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)

            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
            print(label, (left, top), (right, bottom))

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            # My kingdom for a good redistributable image drawing library.
            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i]
                )  # outline=self.colors[c]
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw

        end = timer()
        print(end - start)
        return image