基于mindspore的Yolo视频检测

from random import randint
from PIL import Image
import os
import numpy as np
import mindspore as ms
from src.yolo import YOLOV5s
import cv2
import matplotlib.pyplot as plt


def nms(pred, conf_thres, iou_thres):
    # 置信度抑制,小于置信度阈值则删除
    conf = pred[..., 4] > conf_thres
    box = pred[conf == True]
    # 类别获取
    cls_conf = box[..., 5:]
    cls = []
    for i in range(len(cls_conf)):
        cls.append(int(np.argmax(cls_conf[i])))
    # 获取类别
    total_cls = list(set(cls))  # 删除重复项,获取出现的类别标签列表,example=[0, 17]
    output_box = []  # 最终输出的预测框
    # 不同分类候选框置信度
    for i in range(len(total_cls)):
        clss = total_cls[i]  # 当前类别标签
        # 从所有候选框中取出当前类别对应的所有候选框
        cls_box = []
        for j in range(len(cls)):
            if cls[j] == clss:
                box[j][5] = clss
                cls_box.append(box[j][:6])
        cls_box = np.array(cls_box)
        box_conf = cls_box[..., 4]  # 取出候选框置信度
        box_conf_sort = np.argsort(box_conf)  # 获取排序后索引
        max_conf_box = cls_box[box_conf_sort[len(box_conf) - 1]]
        output_box.append(max_conf_box)  # 将置信度最高的候选框输出为第一个预测框
        cls_box = np.delete(cls_box, 0, 0)  # 删除置信度最高的候选框
        while len(cls_box) > 0:
            max_conf_box = output_box[len(output_box) - 1]  # 将输出预测框列表最后一个作为当前最大置信度候选框
            del_index = []
            for j in range(len(cls_box)):
                current_box = cls_box[j]  # 当前预测框
                interArea = getInter(max_conf_box, current_box)  # 当前预测框与最大预测框交集
                iou = getIou(max_conf_box, current_box, interArea)  # 计算交并比
                if iou > iou_thres:
                    del_index.append(j)  # 根据交并比确定需要移出的索引
            cls_box = np.delete(cls_box, del_index, 0)  # 删除此轮需要移出的候选框
            if len(cls_box) > 0:
                output_box.append(cls_box[0])
                cls_box = np.delete(cls_box, 0, 0)
    return output_box


# 计算并集
def getIou(box1, box2, inter_area):
    box1_area = box1[2] * box1[3]
    box2_area = box2[2] * box2[3]
    union = box1_area + box2_area - inter_area
    iou = inter_area / union
    return iou


# 计算交集
def getInter(box1, box2):
    box1_x1, box1_y1, box1_x2, box1_y2 = box1[0], box1[1], \
                                         box1[0] + box1[2], box1[1] + box1[3]
    box2_x1, box2_y1, box2_x2, box2_y2 = box2[0], box2[1], \
                                         box2[0] + box2[2], box2[1] + box2[3]
    if box1_x1 > box2_x2 or box1_x2 < box2_x1:
        return 0
    if box1_y1 > box2_y2 or box1_y2 < box2_y1:
        return 0
    x_list = [box1_x1, box1_x2, box2_x1, box2_x2]
    x_list = np.sort(x_list)
    x_inter = x_list[2] - x_list[1]
    y_list = [box1_y1, box1_y2, box2_y1, box2_y2]
    y_list = np.sort(y_list)
    y_inter = y_list[2] - y_list[1]
    inter = x_inter * y_inter
    return inter


def draw(img, pred):
    img_ = img.copy()
    if len(pred):
        for detect in pred:
            x1 = int(detect[0])
            y1 = int(detect[1])
            x2 = int(detect[0] + detect[2])
            y2 = int(detect[1] + detect[3])
            score = detect[4]
            cls = detect[5]
            labels = ['crack', 'crul', 'dent', 'material' ]
            print(x1, y1, x2, y2, score, cls)
            img_ = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 1)
            text = labels[int(cls)] + ':' + str(score)
            cv2.putText(img, text, (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1, )
    return img_


def load_parameters(network, filename):
    param_dict = ms.load_checkpoint(filename)
    param_dict_new = {}
    for key, values in param_dict.items():
        if key.startswith('moments.'):
            continue
        elif key.startswith('yolo_network.'):
            param_dict_new[key[13:]] = values
        else:
            param_dict_new[key] = values
    ms.load_param_into_net(network, param_dict_new)


def main(ckpt_file, img):
    orig_h, orig_w = img.shape[:2]
    ms.set_context(mode=ms.GRAPH_MODE, device_target='CPU', device_id=0)
    network = YOLOV5s(is_training=False)
    if os.path.isfile(ckpt_file):
        load_parameters(network, ckpt_file)
    else:
        raise FileNotFoundError(f"{ckpt_file} is not a filename.")
    network.set_train(False)
    input_shape = ms.Tensor(tuple([640, 640]), ms.float32)
    img = cv2.resize(img, (640, 640), cv2.INTER_LINEAR)
    img = img[:, :, ::-1].transpose((2, 0, 1))
    img = img / 255.
    img = np.expand_dims(img, axis=0)
    image = np.concatenate((img[..., ::2, ::2], img[..., 1::2, ::2],
                            img[..., ::2, 1::2], img[..., 1::2, 1::2]), axis=1)
    image = ms.Tensor(image, dtype=ms.float32)
    output_big, output_me, output_small = network(image, input_shape)
    output_big = output_big.asnumpy()
    output_me = output_me.asnumpy()
    output_small = output_small.asnumpy()
    output_small = np.squeeze(output_small)
    output_small = np.reshape(output_small, [19200, 85])
    output_me = np.squeeze(output_me)
    output_me = np.reshape(output_me, [4800, 85])
    output_big = np.squeeze(output_big)
    output_big = np.reshape(output_big, [1200, 85])
    result = np.vstack([output_small, output_me, output_big])
    for i in range(len(result)):
        x = result[i][0] * orig_w
        y = result[i][1] * orig_h
        w = result[i][2] * orig_w
        h = result[i][3] * orig_h
        x_top_left = x - w / 2.
        y_top_left = y - h / 2.
        x_left, y_left = max(0, x_top_left), max(0, y_top_left)
        wi, hi = min(orig_w, w), min(orig_h, h)
        result[i][0], result[i][1], result[i][2], result[i][3] = x_left, y_left, wi, hi
    return result

class VideoCamera(object):
    def __init__(self):
        # 通过opencv获取实时视频流
        self.img_size = 640
        self.threshold = 0.4
        self.max_frame = 160
        self.video = cv2.VideoCapture("D:/model code of github/lxy-code-0613/backend/tmp/image/1.mp4")  #换成自己的视频文件
        self.weights = 'D:/model code of github/lxy-code-0613/backend/output/0-1000_149000.ckpt'   #yolov5权重文件
        ms.set_context(mode=ms.GRAPH_MODE, device_target='CPU', device_id=0)
        network = YOLOV5s(is_training=False)


    def __del__(self):
        self.video.release()

    def get_frame(self):
        ret = True
        i = 0
        while ret:
            ret, frame = self.video.read()   #读视频
            # plt.imshow(frame)
            # plt.show()

            # im0, img = self.preprocess(frame)  #转到处理函数
            # print(im0.shape)
            cv2.imwrite('tmp/image/test.jpg', frame)

            pred = main(self.weights, frame)
            pred = nms(pred, 0.6, 0.4)
            i = i + 1

            ret_img = draw(frame, pred)
            cv2.imwrite('tmp/image/{}.jpg'.format(i), ret_img)


        # 因为openv读取的图片并非jpeg格式,因此要用motion JPEG模式需要先将图片转码成jpg格式图片
        ret, jpeg = cv2.imencode('.jpg', frame)
        return jpeg.tobytes()

    def preprocess(self, frame):

        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img_h = image.size[1]
        img_w = image.size[0]
        net_h = 640
        net_w = 640

        scale = min(float(net_w) / float(img_w), float(net_h) / float(img_h))
        new_w = int(img_w * scale)
        new_h = int(img_h * scale)

        shift_x = (net_w - new_w) // 2
        shift_y = (net_h - new_h) // 2
        shift_x_ratio = (net_w - new_w) / 2.0 / net_w
        shift_y_ratio = (net_h - new_h) / 2.0 / net_h

        image_ = image.resize((new_w, new_h))
        new_image = np.zeros((net_h, net_w, 3), np.uint8)
        new_image[shift_y: new_h + shift_y, shift_x: new_w + shift_x, :] = np.array(image_)
        new_image = new_image.astype(np.float32)
        new_image = new_image / 255
        print('new_image.shape', new_image.shape)
        # new_image = new_image.transpose(2, 0, 1).copy()
        return new_image, image


def gen(camera):
    frame = camera.get_frame()
    return frame



if __name__ == '__main__':
    frame = gen(VideoCamera())

置信度可以自己调,该代码是将视频按帧读取并测试,但是想要把图片按帧转成视频,需要读视频的时候做排序操作,不然会闪屏,代码如下:

video_output = cv2.VideoWriter('tmp/video_draw/result.mp4', fourcc, cap_fps, size)

        # 这里直接读取py文件所在目录下的pics目录所有图片。
        path = 'tmp/video_draw/'
        filenames = os.listdir(path)
        filenameNum = len(filenames)
        for i in range(1, 4):
            filename = path + '/' + str(i) + '.jpg'
            img = cv2.imread(filename)
            video_output.write(img)

        video_output.release()
        return video_output

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值