基于AidLux的热成像电力巡检项目

最新推荐文章于 2024-09-13 22:01:30 发布

听见风里

最新推荐文章于 2024-09-13 22:01:30 发布

阅读量136

点赞数 3

文章标签：深度学习计算机视觉神经网络

本文链接：https://blog.csdn.net/qq_53981430/article/details/130390727

版权

该文介绍了一个基于R-RetinaNet的检测网络项目，使用FPN作为主网络，通过pt-onnx-tflite模型转换在AidLux平台上部署。文中展示了图像预处理、目标分类和边界框回归的步骤，包括NMS算法用于处理检测结果。代码示例显示了如何读取图像、进行推理并进行后处理，如坐标转换和图像处理操作。

摘要由CSDN通过智能技术生成

本项目是基于R-RetinaNet的检测网络

RetinaNet由一个主网络和两种子网络构成，采用FPN作为RetinaNet的主网络，是一个现成的神经网络，负责从输入图像上计算出卷积特征图，第一种子网对主网络的输出进行目标分类，第二种子网负责边界框回归。

为了将项目在AidLux平台部署，本项目需要完成前置模型转换工作采取的方案：pt—onnx—tflite。

部分实时监测代码如下：

def decoder(ims, anchors, cls_score, bbox_pred, thresh=0.6, nms_thresh=0.2, test_conf=None):
        if test_conf is not None:
            thresh = test_conf
        bboxes = BoxCoder().decode(anchors, bbox_pred, mode='xywht')
        bboxes = clip_boxes(bboxes, ims)
        scores = cls_score.max(2, keepdims=True)
        keep = (scores >= thresh)[0, :, 0]
        if keep.sum() == 0:
            return [np.zeros(1), np.zeros(1), np.zeros(1, 5)]
        scores = scores[:, keep, :]
        anchors = anchors[:, keep, :]
        cls_score = cls_score[:, keep, :]
        bboxes = bboxes[:, keep, :]
        # NMS
        anchors_nms_idx = nms(np.concatenate([bboxes, scores], axis=2)[0, :, :], nms_thresh)
        nms_scores = cls_score[0, anchors_nms_idx, :].max(axis=1)
        nms_class = cls_score[0, anchors_nms_idx, :].argmax(axis=1)
        output_boxes = np.concatenate([
            bboxes[0, anchors_nms_idx, :],
            anchors[0, anchors_nms_idx, :]],
            axis=1
        )
        return [nms_scores, nms_class, output_boxes]



def process_img(img, target_size=640, max_size=2000, multiple=32, keep_ratio=True, NCHW=True, ToTensor=True):
    '''
    图像与处理
    '''
    im_shape = img.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    # resize with keep_ratio
    if keep_ratio:
        im_scale = float(target_size) / float(im_size_min)  
        if np.round(im_scale * im_size_max) > max_size:     
            im_scale = float(max_size) / float(im_size_max)
        im_scale_x = np.floor(img.shape[1] * im_scale / multiple) * multiple / img.shape[1]
        im_scale_y = np.floor(img.shape[0] * im_scale / multiple) * multiple / img.shape[0]
        image_resized = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR)
        im_scales = np.array([im_scale_x, im_scale_y, im_scale_x, im_scale_y])
        im = image_resized / 255.0  # np.float64
        im = im.astype(np.float32)
        PIXEL_MEANS =(0.485, 0.456, 0.406)    # RGB  format mean and variances
        PIXEL_STDS = (0.229, 0.224, 0.225)
        im -= np.array(PIXEL_MEANS)
        im /= np.array(PIXEL_STDS)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)  # BGR2RGB
        if NCHW:
            im = np.transpose(im, (2, 0, 1)).astype(np.float32)  # [SAI-KEY] TensorFlow use input with NHWC.
        im = im[np.newaxis, ...]
        if ToTensor:
            im = torch.from_numpy(im)
        return im, im_scales
    else:
        return None

if __name__=="__main__":
    '''
    读取本地图片
    '''
    image_path = "/home/R-RetinaNet/samples/000001.jpg"
    cap = cvs.VideoCapture(image_path)
    img = cap.read()
    im, im_scales = process_img(img, NCHW=False, ToTensor=False)  # im: NHWC

    ''' 定义输入输出shape '''
    in_shape = [1 * 640 * 800 * 3 * 4]  # HWC, float32
    out_shape = [1 * 53325 * 8 * 4]  # 8400: total cells, 52 = 48(num_classes) + 4(xywh), float32
    # out_shape = [1 * 55425 * 8 * 4]  # 8400: total cells, 52 = 48(num_classes) + 4(xywh), float32

    ''' AidLite初始化 '''
    aidlite = aidlite_gpu.aidlite()
    ''' 加载R-RetinaNet模型 '''
    tflite_model = '/home/R-RetinaNet/models/r-retinanet.tflite'
    res = aidlite.ANNModel(tflite_model, in_shape, out_shape, 4, -1) # Infer on -1: cpu, 0: gpu, 1: mixed, 2: dsp

    ''' 设定输入输出 '''
    aidlite.setInput_Float32(im, 800, 640)

    ''' 启动推理 '''
    aidlite.invoke()

    ''' 捕获输出 '''
    preds = aidlite.getOutput_Float32(0)
    # preds = preds.reshape(1, 8, 53325)
    preds = preds.reshape(1, 8, (int)(preds.shape[0]/8))
    output = np.transpose(preds, (0, 2, 1))

    ''' 创建Anchor '''
    im_anchor = np.transpose(im, (0, 3, 1, 2)).astype(np.float32)
    anchors_list = []
    anchor_generator = Anchors(ratios = np.array([0.2, 0.5, 1, 2, 5]))
    original_anchors = anchor_generator(im_anchor)   # (bs, num_all_achors, 5)
    anchors_list.append(original_anchors)

    ''' 解算输出 '''
    decode_output = decoder(im_anchor, anchors_list[-1], output[..., 5:8], output[..., 0:5], thresh=0.5, nms_thresh=0.2, test_conf=None)
    for i in range(len(decode_output)):
        print("dim({}), shape: {}".format(i, decode_output[i].shape))

    ''' 重构输出 '''
    scores = decode_output[0].reshape(-1, 1)
    classes = decode_output[1].reshape(-1, 1)
    boxes = decode_output[2]
    boxes[:, :4] = boxes[:, :4] / im_scales
    if boxes.shape[1] > 5:   
        boxes[:, 5:9] = boxes[:, 5:9] / im_scales
    dets = np.concatenate([classes, scores, boxes], axis=1)

    ''' 过滤类别 '''
    keep = np.where(classes > 0)[0]
    dets =  dets[keep, :]

    ''' 转换坐标('xyxya'->'xyxyxyxy') '''
    res = sort_corners(rbox_2_quad(dets[:, 2:]))

    ''' 评估绘图 '''
    for k in range(dets.shape[0]):
        cv2.line(img, (int(res[k, 0]), int(res[k, 1])), (int(res[k, 2]), int(res[k, 3])), (0, 255, 0), 3)
        cv2.line(img, (int(res[k, 2]), int(res[k, 3])), (int(res[k, 4]), int(res[k, 5])), (0, 255, 0), 3)
        cv2.line(img, (int(res[k, 4]), int(res[k, 5])), (int(res[k, 6]), int(res[k, 7])), (0, 255, 0), 3)
        cv2.line(img, (int(res[k, 6]), int(res[k, 7])), (int(res[k, 0]), int(res[k, 1])), (0, 255, 0), 3)
    cv2.imwrite("/home/R-RetinaNet/samples/00_detected_image.jpg", img)

    ''' 将绝缘子旋转至水平 '''
    t_center = ((dets[0, 4]+dets[0, 2])/2, (dets[0,5]+dets[0,3])/2)
    t_angle = dets[0, 6]
    t_height, t_width = img.shape[:2]
    rotate_matrix = cv2.getRotationMatrix2D(center=t_center, angle=t_angle, scale=1)
    rotated_image = cv2.warpAffine(src=img, M=rotate_matrix, dsize=(t_width, t_height))
    
    ''' 转换旋转后的坐标 '''
    new_coord = np.zeros((dets.shape[0], 4, 2), dtype=np.float)

    ''' 当存在多根绝缘子, 以其中一条为例进行后处理 '''
    k = 0
    new_coord[k, 0] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 0]], [res[k, 1]], [1]])))
    new_coord[k, 1] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 2]], [res[k, 3]], [1]])))
    new_coord[k, 2] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 4]], [res[k, 5]], [1]])))
    new_coord[k, 3] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 6]], [res[k, 7]], [1]])))

    ''' 获取标准外接矩形 '''
    (x, y, w, h) = get_std_rect(new_coord[k])

    ''' 提取ROI图像 '''
    roi_image = rotated_image[y:(y+h), x:(x+w)]
    ''' 灰度图 '''
    gray_image = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY)
    ''' 二值化 '''
    retval, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY)

    ''' 创建一个5*5的值为1的卷积核 '''
    kernel = np.ones((5, 5), np.uint8)
    ''' 腐蚀运算, 迭代1次 '''
    erode_image = cv2.erode(binary_image, kernel, iterations=1)

    ''' 存储本地评估 '''
    cv2.imwrite("/home/R-RetinaNet/samples/01_rotated_image.jpg", rotated_image)
    cv2.imwrite("/home/R-RetinaNet/samples/02_roi_image.jpg", roi_image)
    cv2.imwrite("/home/R-RetinaNet/samples/03_binary_image.jpg", binary_image)
    cv2.imwrite("/home/R-RetinaNet/samples/04_erode_image.jpg", erode_image)

实现视频以及照片展示：

aid1