from random import randint
from PIL import Image
import os
import numpy as np
import mindspore as ms
from src.yolo import YOLOV5s
import cv2
import matplotlib.pyplot as plt
def nms(pred, conf_thres, iou_thres):
# 置信度抑制,小于置信度阈值则删除
conf = pred[..., 4] > conf_thres
box = pred[conf == True]
# 类别获取
cls_conf = box[..., 5:]
cls = []
for i in range(len(cls_conf)):
cls.append(int(np.argmax(cls_conf[i])))
# 获取类别
total_cls = list(set(cls)) # 删除重复项,获取出现的类别标签列表,example=[0, 17]
output_box = [] # 最终输出的预测框
# 不同分类候选框置信度
for i in range(len(total_cls)):
clss = total_cls[i] # 当前类别标签
# 从所有候选框中取出当前类别对应的所有候选框
cls_box = []
for j in range(len(cls)):
if cls[j] == clss:
box[j][5] = clss
cls_box.append(box[j][:6])
cls_box = np.array(cls_box)
box_conf = cls_box[..., 4] # 取出候选框置信度
box_conf_sort = np.argsort(box_conf) # 获取排序后索引
max_conf_box = cls_box[box_conf_sort[len(box_conf) - 1]]
output_box.append(max_conf_box) # 将置信度最高的候选框输出为第一个预测框
cls_box = np.delete(cls_box, 0, 0) # 删除置信度最高的候选框
while len(cls_box) > 0:
max_conf_box = output_box[len(output_box) - 1] # 将输出预测框列表最后一个作为当前最大置信度候选框
del_index = []
for j in range(len(cls_box)):
current_box = cls_box[j] # 当前预测框
interArea = getInter(max_conf_box, current_box) # 当前预测框与最大预测框交集
iou = getIou(max_conf_box, current_box, interArea) # 计算交并比
if iou > iou_thres:
del_index.append(j) # 根据交并比确定需要移出的索引
cls_box = np.delete(cls_box, del_index, 0) # 删除此轮需要移出的候选框
if len(cls_box) > 0:
output_box.append(cls_box[0])
cls_box = np.delete(cls_box, 0, 0)
return output_box
# 计算并集
def getIou(box1, box2, inter_area):
box1_area = box1[2] * box1[3]
box2_area = box2[2] * box2[3]
union = box1_area + box2_area - inter_area
iou = inter_area / union
return iou
# 计算交集
def getInter(box1, box2):
box1_x1, box1_y1, box1_x2, box1_y2 = box1[0], box1[1], \
box1[0] + box1[2], box1[1] + box1[3]
box2_x1, box2_y1, box2_x2, box2_y2 = box2[0], box2[1], \
box2[0] + box2[2], box2[1] + box2[3]
if box1_x1 > box2_x2 or box1_x2 < box2_x1:
return 0
if box1_y1 > box2_y2 or box1_y2 < box2_y1:
return 0
x_list = [box1_x1, box1_x2, box2_x1, box2_x2]
x_list = np.sort(x_list)
x_inter = x_list[2] - x_list[1]
y_list = [box1_y1, box1_y2, box2_y1, box2_y2]
y_list = np.sort(y_list)
y_inter = y_list[2] - y_list[1]
inter = x_inter * y_inter
return inter
def draw(img, pred):
img_ = img.copy()
if len(pred):
for detect in pred:
x1 = int(detect[0])
y1 = int(detect[1])
x2 = int(detect[0] + detect[2])
y2 = int(detect[1] + detect[3])
score = detect[4]
cls = detect[5]
labels = ['crack', 'crul', 'dent', 'material' ]
print(x1, y1, x2, y2, score, cls)
img_ = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 1)
text = labels[int(cls)] + ':' + str(score)
cv2.putText(img, text, (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1, )
return img_
def load_parameters(network, filename):
param_dict = ms.load_checkpoint(filename)
param_dict_new = {}
for key, values in param_dict.items():
if key.startswith('moments.'):
continue
elif key.startswith('yolo_network.'):
param_dict_new[key[13:]] = values
else:
param_dict_new[key] = values
ms.load_param_into_net(network, param_dict_new)
def main(ckpt_file, img):
orig_h, orig_w = img.shape[:2]
ms.set_context(mode=ms.GRAPH_MODE, device_target='CPU', device_id=0)
network = YOLOV5s(is_training=False)
if os.path.isfile(ckpt_file):
load_parameters(network, ckpt_file)
else:
raise FileNotFoundError(f"{ckpt_file} is not a filename.")
network.set_train(False)
input_shape = ms.Tensor(tuple([640, 640]), ms.float32)
img = cv2.resize(img, (640, 640), cv2.INTER_LINEAR)
img = img[:, :, ::-1].transpose((2, 0, 1))
img = img / 255.
img = np.expand_dims(img, axis=0)
image = np.concatenate((img[..., ::2, ::2], img[..., 1::2, ::2],
img[..., ::2, 1::2], img[..., 1::2, 1::2]), axis=1)
image = ms.Tensor(image, dtype=ms.float32)
output_big, output_me, output_small = network(image, input_shape)
output_big = output_big.asnumpy()
output_me = output_me.asnumpy()
output_small = output_small.asnumpy()
output_small = np.squeeze(output_small)
output_small = np.reshape(output_small, [19200, 85])
output_me = np.squeeze(output_me)
output_me = np.reshape(output_me, [4800, 85])
output_big = np.squeeze(output_big)
output_big = np.reshape(output_big, [1200, 85])
result = np.vstack([output_small, output_me, output_big])
for i in range(len(result)):
x = result[i][0] * orig_w
y = result[i][1] * orig_h
w = result[i][2] * orig_w
h = result[i][3] * orig_h
x_top_left = x - w / 2.
y_top_left = y - h / 2.
x_left, y_left = max(0, x_top_left), max(0, y_top_left)
wi, hi = min(orig_w, w), min(orig_h, h)
result[i][0], result[i][1], result[i][2], result[i][3] = x_left, y_left, wi, hi
return result
class VideoCamera(object):
def __init__(self):
# 通过opencv获取实时视频流
self.img_size = 640
self.threshold = 0.4
self.max_frame = 160
self.video = cv2.VideoCapture("D:/model code of github/lxy-code-0613/backend/tmp/image/1.mp4") #换成自己的视频文件
self.weights = 'D:/model code of github/lxy-code-0613/backend/output/0-1000_149000.ckpt' #yolov5权重文件
ms.set_context(mode=ms.GRAPH_MODE, device_target='CPU', device_id=0)
network = YOLOV5s(is_training=False)
def __del__(self):
self.video.release()
def get_frame(self):
ret = True
i = 0
while ret:
ret, frame = self.video.read() #读视频
# plt.imshow(frame)
# plt.show()
# im0, img = self.preprocess(frame) #转到处理函数
# print(im0.shape)
cv2.imwrite('tmp/image/test.jpg', frame)
pred = main(self.weights, frame)
pred = nms(pred, 0.6, 0.4)
i = i + 1
ret_img = draw(frame, pred)
cv2.imwrite('tmp/image/{}.jpg'.format(i), ret_img)
# 因为openv读取的图片并非jpeg格式,因此要用motion JPEG模式需要先将图片转码成jpg格式图片
ret, jpeg = cv2.imencode('.jpg', frame)
return jpeg.tobytes()
def preprocess(self, frame):
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
img_h = image.size[1]
img_w = image.size[0]
net_h = 640
net_w = 640
scale = min(float(net_w) / float(img_w), float(net_h) / float(img_h))
new_w = int(img_w * scale)
new_h = int(img_h * scale)
shift_x = (net_w - new_w) // 2
shift_y = (net_h - new_h) // 2
shift_x_ratio = (net_w - new_w) / 2.0 / net_w
shift_y_ratio = (net_h - new_h) / 2.0 / net_h
image_ = image.resize((new_w, new_h))
new_image = np.zeros((net_h, net_w, 3), np.uint8)
new_image[shift_y: new_h + shift_y, shift_x: new_w + shift_x, :] = np.array(image_)
new_image = new_image.astype(np.float32)
new_image = new_image / 255
print('new_image.shape', new_image.shape)
# new_image = new_image.transpose(2, 0, 1).copy()
return new_image, image
def gen(camera):
frame = camera.get_frame()
return frame
if __name__ == '__main__':
frame = gen(VideoCamera())
置信度可以自己调,该代码是将视频按帧读取并测试,但是想要把图片按帧转成视频,需要读视频的时候做排序操作,不然会闪屏,代码如下:
video_output = cv2.VideoWriter('tmp/video_draw/result.mp4', fourcc, cap_fps, size)
# 这里直接读取py文件所在目录下的pics目录所有图片。
path = 'tmp/video_draw/'
filenames = os.listdir(path)
filenameNum = len(filenames)
for i in range(1, 4):
filename = path + '/' + str(i) + '.jpg'
img = cv2.imread(filename)
video_output.write(img)
video_output.release()
return video_output