本项目是基于R-RetinaNet的检测网络
RetinaNet由一个主网络和两种子网络构成,采用FPN作为RetinaNet的主网络,是一个现成的神经网络,负责从输入图像上计算出卷积特征图,第一种子网对主网络的输出进行目标分类,第二种子网负责边界框回归。
为了将项目在AidLux平台部署,本项目需要完成前置模型转换工作采取的方案:pt—onnx—tflite。
部分实时监测代码如下:
def decoder(ims, anchors, cls_score, bbox_pred, thresh=0.6, nms_thresh=0.2, test_conf=None):
if test_conf is not None:
thresh = test_conf
bboxes = BoxCoder().decode(anchors, bbox_pred, mode='xywht')
bboxes = clip_boxes(bboxes, ims)
scores = cls_score.max(2, keepdims=True)
keep = (scores >= thresh)[0, :, 0]
if keep.sum() == 0:
return [np.zeros(1), np.zeros(1), np.zeros(1, 5)]
scores = scores[:, keep, :]
anchors = anchors[:, keep, :]
cls_score = cls_score[:, keep, :]
bboxes = bboxes[:, keep, :]
# NMS
anchors_nms_idx = nms(np.concatenate([bboxes, scores], axis=2)[0, :, :], nms_thresh)
nms_scores = cls_score[0, anchors_nms_idx, :].max(axis=1)
nms_class = cls_score[0, anchors_nms_idx, :].argmax(axis=1)
output_boxes = np.concatenate([
bboxes[0, anchors_nms_idx, :],
anchors[0, anchors_nms_idx, :]],
axis=1
)
return [nms_scores, nms_class, output_boxes]
def process_img(img, target_size=640, max_size=2000, multiple=32, keep_ratio=True, NCHW=True, ToTensor=True):
'''
图像与处理
'''
im_shape = img.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
# resize with keep_ratio
if keep_ratio:
im_scale = float(target_size) / float(im_size_min)
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im_scale_x = np.floor(img.shape[1] * im_scale / multiple) * multiple / img.shape[1]
im_scale_y = np.floor(img.shape[0] * im_scale / multiple) * multiple / img.shape[0]
image_resized = cv2.resize(img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR)
im_scales = np.array([im_scale_x, im_scale_y, im_scale_x, im_scale_y])
im = image_resized / 255.0 # np.float64
im = im.astype(np.float32)
PIXEL_MEANS =(0.485, 0.456, 0.406) # RGB format mean and variances
PIXEL_STDS = (0.229, 0.224, 0.225)
im -= np.array(PIXEL_MEANS)
im /= np.array(PIXEL_STDS)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # BGR2RGB
if NCHW:
im = np.transpose(im, (2, 0, 1)).astype(np.float32) # [SAI-KEY] TensorFlow use input with NHWC.
im = im[np.newaxis, ...]
if ToTensor:
im = torch.from_numpy(im)
return im, im_scales
else:
return None
if __name__=="__main__":
'''
读取本地图片
'''
image_path = "/home/R-RetinaNet/samples/000001.jpg"
cap = cvs.VideoCapture(image_path)
img = cap.read()
im, im_scales = process_img(img, NCHW=False, ToTensor=False) # im: NHWC
''' 定义输入输出shape '''
in_shape = [1 * 640 * 800 * 3 * 4] # HWC, float32
out_shape = [1 * 53325 * 8 * 4] # 8400: total cells, 52 = 48(num_classes) + 4(xywh), float32
# out_shape = [1 * 55425 * 8 * 4] # 8400: total cells, 52 = 48(num_classes) + 4(xywh), float32
''' AidLite初始化 '''
aidlite = aidlite_gpu.aidlite()
''' 加载R-RetinaNet模型 '''
tflite_model = '/home/R-RetinaNet/models/r-retinanet.tflite'
res = aidlite.ANNModel(tflite_model, in_shape, out_shape, 4, -1) # Infer on -1: cpu, 0: gpu, 1: mixed, 2: dsp
''' 设定输入输出 '''
aidlite.setInput_Float32(im, 800, 640)
''' 启动推理 '''
aidlite.invoke()
''' 捕获输出 '''
preds = aidlite.getOutput_Float32(0)
# preds = preds.reshape(1, 8, 53325)
preds = preds.reshape(1, 8, (int)(preds.shape[0]/8))
output = np.transpose(preds, (0, 2, 1))
''' 创建Anchor '''
im_anchor = np.transpose(im, (0, 3, 1, 2)).astype(np.float32)
anchors_list = []
anchor_generator = Anchors(ratios = np.array([0.2, 0.5, 1, 2, 5]))
original_anchors = anchor_generator(im_anchor) # (bs, num_all_achors, 5)
anchors_list.append(original_anchors)
''' 解算输出 '''
decode_output = decoder(im_anchor, anchors_list[-1], output[..., 5:8], output[..., 0:5], thresh=0.5, nms_thresh=0.2, test_conf=None)
for i in range(len(decode_output)):
print("dim({}), shape: {}".format(i, decode_output[i].shape))
''' 重构输出 '''
scores = decode_output[0].reshape(-1, 1)
classes = decode_output[1].reshape(-1, 1)
boxes = decode_output[2]
boxes[:, :4] = boxes[:, :4] / im_scales
if boxes.shape[1] > 5:
boxes[:, 5:9] = boxes[:, 5:9] / im_scales
dets = np.concatenate([classes, scores, boxes], axis=1)
''' 过滤类别 '''
keep = np.where(classes > 0)[0]
dets = dets[keep, :]
''' 转换坐标('xyxya'->'xyxyxyxy') '''
res = sort_corners(rbox_2_quad(dets[:, 2:]))
''' 评估绘图 '''
for k in range(dets.shape[0]):
cv2.line(img, (int(res[k, 0]), int(res[k, 1])), (int(res[k, 2]), int(res[k, 3])), (0, 255, 0), 3)
cv2.line(img, (int(res[k, 2]), int(res[k, 3])), (int(res[k, 4]), int(res[k, 5])), (0, 255, 0), 3)
cv2.line(img, (int(res[k, 4]), int(res[k, 5])), (int(res[k, 6]), int(res[k, 7])), (0, 255, 0), 3)
cv2.line(img, (int(res[k, 6]), int(res[k, 7])), (int(res[k, 0]), int(res[k, 1])), (0, 255, 0), 3)
cv2.imwrite("/home/R-RetinaNet/samples/00_detected_image.jpg", img)
''' 将绝缘子旋转至水平 '''
t_center = ((dets[0, 4]+dets[0, 2])/2, (dets[0,5]+dets[0,3])/2)
t_angle = dets[0, 6]
t_height, t_width = img.shape[:2]
rotate_matrix = cv2.getRotationMatrix2D(center=t_center, angle=t_angle, scale=1)
rotated_image = cv2.warpAffine(src=img, M=rotate_matrix, dsize=(t_width, t_height))
''' 转换旋转后的坐标 '''
new_coord = np.zeros((dets.shape[0], 4, 2), dtype=np.float)
''' 当存在多根绝缘子, 以其中一条为例进行后处理 '''
k = 0
new_coord[k, 0] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 0]], [res[k, 1]], [1]])))
new_coord[k, 1] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 2]], [res[k, 3]], [1]])))
new_coord[k, 2] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 4]], [res[k, 5]], [1]])))
new_coord[k, 3] = np.squeeze(np.dot(rotate_matrix, np.array([[res[k, 6]], [res[k, 7]], [1]])))
''' 获取标准外接矩形 '''
(x, y, w, h) = get_std_rect(new_coord[k])
''' 提取ROI图像 '''
roi_image = rotated_image[y:(y+h), x:(x+w)]
''' 灰度图 '''
gray_image = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY)
''' 二值化 '''
retval, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY)
''' 创建一个5*5的值为1的卷积核 '''
kernel = np.ones((5, 5), np.uint8)
''' 腐蚀运算, 迭代1次 '''
erode_image = cv2.erode(binary_image, kernel, iterations=1)
''' 存储本地评估 '''
cv2.imwrite("/home/R-RetinaNet/samples/01_rotated_image.jpg", rotated_image)
cv2.imwrite("/home/R-RetinaNet/samples/02_roi_image.jpg", roi_image)
cv2.imwrite("/home/R-RetinaNet/samples/03_binary_image.jpg", binary_image)
cv2.imwrite("/home/R-RetinaNet/samples/04_erode_image.jpg", erode_image)
实现视频以及照片展示:
aid1
照片演示
注:照片及视频中的识别框出现识别不完全准确的原因是输入宽高处理有点小问题