WIN10下基于matterport的Mask RCNN的摄像头实时识别

最新推荐文章于 2024-09-14 19:18:15 发布

爱学习的榴莲头

最新推荐文章于 2024-09-14 19:18:15 发布

阅读量360

点赞数

文章标签：深度学习 python

本文链接：https://blog.csdn.net/qq_42670174/article/details/116169272

版权

哈哈新人一枚，看了许许多多大佬分享的文章，今天也打算分享一下。

大学毕业设计用到Mask RCNN，然后就在Github上找到了matterport这个团队做的项目，说实话这个团队做的Mask RCNN项目是真心不错。

项目源码地址：https://github.com/matterport/Mask_RCNN.git

我搭建Mask RCNN的环境是Win10、CUDA _9.0、cudnn _7、anaoconda _3、tensorflow-gpu_1.6.0、keras_2.1.6

环境搭建参考：https://zhuanlan.zhihu.com/p/160913716

训练自己的数据集参考：https://blog.csdn.net/qq_29462849/article/details/81037343，但是我更推荐上B站看：https://www.bilibili.com/video/BV1254y1e77o

我就是看了B站的教程，我才对整个训练过程的数据结构有所了解。

识别前的准备

开始之前需要导入这些相关函数库

import os
import cv2
import numpy as np
import colorsys
import random

from mrcnn.config import Config
from mrcnn import model as modellib

在训练完后，会在整个工程目录的logs文件夹中生成以.h5为后缀的权重文件，一般选择最后一个h5文件（但是具体要看你训练数据时，layers参数的选择而决定）


def main():
    # 设置好你的配置
    config = LightConfig()
    # 根据你权重文件的路径来获取
    model = modellib.MaskRCNN(mode="inference", config=config,
                              model_dir='../../logs/paper20210418T2319')
    weight_path = os.path.abspath('../../logs/paper20210418T2319/mask_rcnn_paper_0025.h5')
    # 加载训练模型的权重
    model.load_weights(weight_path, by_name=True)
    # 开始检测
    # implement(model,images,images_path)
    # 调用摄像头识别
    video_detection(model)


if __name__ == '__main__':
    main()

调用摄像头进行识别

def on_mouse_action(event,x,y,flags,param):
    # global go_on
    # go_on = True
    if event == cv2.EVENT_LBUTTONDOWN:
        print("左键点击")
        global go_on
        go_on = False


def video_detection(model):
    # 初始化，获取摄像头
    get_video = cv2.VideoCapture(0)
    cv2.namedWindow('video_detection')
    # 设置监听事件，对着摄像头的窗口点击左键，就会停用摄像头
    cv2.setMouseCallback('video_detection', on_mouse_action)
    # 设置类别名称
    class_names = [
        'BG',
        'handle',
        'chair'
    ]
    # 获取一帧图像
    success, frame = get_video.read()
    while success and go_on:
        # frame = frame[...,::-1]
        # 对一帧图像进行检测
        res = model.detect([frame], verbose=0)[0]
        # 可视化，生成掩膜和边框
        prediction(frame, res['rois'], res['masks'], res['class_ids'], res['scores'], class_names)
        # 获取下一帧图像
        success,frame = get_video.read()
    #释放摄像头
    cv2.destroyWindow('detected image')
    get_video.release()

对识别结果进行可视化

def prediction(img, boxes, masks, class_ids, scores,class_names):
    have_instance = True
    # 获取一张图像上实例的个数
    instance_num = boxes.shape[0]
    # 判断有无实例
    if not instance_num:
        # 当没有实例时，设置have_instance 为False，让后显示图像
        have_instance = False
        print('\n___未检测到实例___\n')
    # 随机为实例分配颜色
    colors = random_colors(instance_num)
    # 改变输入图像的数据类型方便处理
    masked_img = img.astype(np.uint32).copy()
    # 若检测到实例，则生成掩膜和边框
    if have_instance:
        for i in range(instance_num):
            color = (colors[i][0]*255*0.5,colors[i][1]*255*0.5,colors[i][2]*255*0.5)

            if not np.any(boxes[i]):
                continue
            # 获取一个实例的坐标，（x1,y1）表示边框右上角坐标，(x2,y2)表示边框左下角坐标
            y1,x1,y2,x2 = boxes[i]
            # 生成掩膜
            mask = masks[:,:,i]
            for c in range(3):
                masked_img[:,:,c] = np.where(mask==1 ,
                                      masked_img[:, :, c] *
                                      (1 - 0.5) +  color[c] ,
                                      masked_img[:, :, c])
            # 得到掩膜后，转会图像原来的数据类型，方便Opencv操作和输出
            masked_img = masked_img.astype(np.uint8)
            # 利用Opencv的rectangle函数绘制边框
            cv2.rectangle(masked_img,(x1,y1),(x2,y2),color,2)
            # 利用Opencv在图像上描述信息：如实例的类别和置信度
            class_id = class_ids[i]
            score = scores[i] if scores is not None else None
            label = class_names[class_id]
            caption = "{} {:.3f}".format(label, score) if score else label
            cv2.putText(masked_img,
                        caption,
                        (int(x1), int(y1 - 6)),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        1, (255, 255, 255))
    else:
        # 若没有实例，输出为原来的图像
        masked_img = masked_img.astype(np.uint8)
    # 利用Opencv输出图像
    cv2.imshow('prediction_result',masked_img)
    cv2.waitKey(1)


# 随机分配颜色的函数
# 来自于原项目的
def random_colors(num, bright=True):
    brightness = 1.0 if bright else 0.7
    hsv = [(i / num, 1, brightness) for i in range(num)]
    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
    random.shuffle(colors)
    return colors

哈哈！最后用了一种比较笨的办法，利用Opencv-python的cv2.waitKey(1)来实现1秒一帧的效果。

哈哈！可能做得不太好，只能一秒一帧的效果。纯属是想分享一下

里面大部分代码是通过matterport项目的源码改写而来的，如果真想做一下Mask RCNN，最好先把这个项目的ipynb文件看懂