接下来你就跟着把数据集带入到yolov5代码中进行训练得到模型就行了,在这里训练参数,batch-size推荐16,epoch为6, 学习率为0.01,预训练模型我用的是YOLO5s,不需要太大,能够看到识别效果就行。
接下来得到模型之后就是转换为onnx文件,这个YOLOV5代码,运行export.py文件就能得到。
接着我是使用openvino Python版的进行模型推理和部署的。所以要把onnx文件转换为**.bin和.xml**文件。
这个你要进到安装openvino的目录下进行,在这里参考知乎大佬链接
接下来就直接上代码,进行模型部署推理
pyqt5桌面文件
import sys
from PyQt5.QtWidgets import QLabel, QPushButton, QMainWindow, QWidget, QApplication, QHBoxLayout, QVBoxLayout
from PyQt5.QtGui import QPixmap, QColor, QImage
from PyQt5.QtCore import QTimer
from openvinoDetect import main
import cv2
class Example(QWidget):
def __init__(self, cap, dectAction):
super().__init__()
self.initUI()
self.cap = cap
self.dectAction = dectAction
def initUI(self):
hbox = QHBoxLayout()
vbox = QVBoxLayout()
self.cameraTimer = QTimer()
self.label = QLabel()
self.btnOpen = QPushButton("打开摄像头")
self.btnOpen.clicked.connect(self.OpenCamera)
self.cameraTimer.timeout.connect(self.ShowCamera)
self.btnDect = QPushButton("开始检测")
self.btnDect.clicked.connect(self.DectCamera)
hbox.addWidget(self.btnOpen)
hbox.addWidget(self.btnDect)
vbox.addWidget(self.label)
vbox.addLayout(hbox)
self.setLayout(vbox)
self.setGeometry(500,300,1000,600)
self.setWindowTitle("安全帽检测")
self.show()
def ShowCamera(self):
flag, image = self.cap.read() # 从视频流中读取图片
# image_show = cv2.resize(image, (600, 600)) # 把读到的帧的大小重新设置为 1280x800
width, height = image.shape[:2] # 行:宽,列:高
image_show = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # opencv读的通道是BGR,要转成RGB
if self.dectAction:
image_show = main(image_show)
# image_show = cv2.cvtColor(image_show, cv2.COLOR_RGB2HSV)
# image_show = cv2.flip(image_show, 1) # 水平翻转,因为摄像头拍的是镜像的。
# 把读取到的视频数据变成QImage形式(图片数据、高、宽、RGB颜色空间,三个通道各有2**8=256种颜色)
show = QImage(image_show.data, height, width, QImage.Format_RGB888)
self.label.setPixmap(QPixmap.fromImage(show)) # 往显示视频的Label里显示QImage
def OpenCamera(self):
if self.cap.isOpened():
self.cameraTimer.start(10) # 每40毫秒读取一次,即刷新率为25帧
self.ShowCamera()
else:
print("摄像头没打开")
return None
def DectCamera(self):
self.dectAction = True
if __name__ == '__main__':
cap = cv2.VideoCapture("D:/images/hatDect.mp4")
dectAction = False
if not cap.isOpened():
print("Cannot open camera")
exit()
app = QApplication(sys.argv)
ex = Example(cap, dectAction)
sys.exit(app.exec_())
openvino推理代码
from __future__ import print_function
import logging as log
import os
import pathlib
import json
import cv2
import numpy as np
from openvino.inference_engine import IENetwork, IECore
import torch
import torchvision
import time
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = torch.zeros_like(x) if isinstance(
x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
labels=(), max_det=300):
"""Runs Non-Maximum Suppression (NMS) on inference results
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
prediction = torch.from_numpy(prediction)
if prediction.dtype is torch.float16:
prediction = prediction.float() # to FP32
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
# Settings
min_wh, max_wh = 2, 7680 # (pixels) minimum and maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = torch.zeros((len(lb), nc + 5), device=x.device)
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
LOGGER.warning(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded
return output
device = 'CPU'
# device = 'CPU'
input_h, input_w, input_c, input_n = (640, 640, 3, 1)
log.basicConfig(level=log.DEBUG)
# For objection detection task, replace your target labels here.
label_id_map = ["person","hat"]
exec_net = None
def init(model_xml):
if not os.path.isfile(model_xml):
log.error(f'{model_xml} does not exist')
return None
model_bin = pathlib.Path(model_xml).with_suffix('.bin').as_posix()
net = IENetwork(model=model_xml, weights=model_bin)
ie = IECore()
global exec_net
exec_net = ie.load_network(network=net, device_name=device)
input_blob = next(iter(net.inputs))
n, c, h, w = net.inputs[input_blob].shape
global input_h, input_w, input_c, input_n
input_h, input_w, input_c, input_n = h, w, c, n
return net
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--model_xml', type=str, default='best.xml')
parser.add_argument('--source', type=str, default='data/test')
opt = parser.parse_args()
predictor = init(opt.model_xml)
def process_image(net, input_image):
if not net or input_image is None:
log.error('Invalid input args')
return None
ih, iw, _ = input_image.shape
if ih != input_h or iw != input_w:
images = cv2.dnn.blobFromImage(input_image, 1 / 255, (input_w, input_h), (0, 0, 0), swapRB=True, crop=False)
# input_image = cv2.resize(input_image, (input_w, input_h))
# input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
# input_image = input_image/255
# input_image = input_image.transpose((2, 0, 1))
# images = np.ndarray(shape=(input_n, input_c, input_h, input_w))
# images[0] = input_image
input_blob = next(iter(net.inputs))
out_blob = next(iter(net.outputs))
start = time.time()
res = exec_net.infer(inputs={input_blob: images})
end = time.time()
print('-[INFO] inference time: {}ms'.format(end - start))
data = res[out_blob]
data = non_max_suppression(data, 0.25, 0.45)
detect_objs = []
if data[0] == None:
return json.dumps({"objects": detect_objs})
else:
data = data[0].numpy()
for proposal in data:
if proposal[4] > 0:
confidence = proposal[4]
xmin = np.int(iw * (proposal[0]/640))
ymin = np.int(ih * (proposal[1]/640))
xmax = np.int(iw * (proposal[2]/640))
ymax = np.int(ih * (proposal[3]/640))
detect_objs.append((
int(xmin),
int(ymin),
int(xmax),
int(ymax),
label_id_map[int(proposal[5])],
float(confidence)
))
return detect_objs
def plot_bboxes(image, bboxes, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round( 0.002 * (image.shape[0] + image.shape[1]) / 2) + 1 # line/font thickness
for (x1, y1, x2, y2, cls_id, pos_id) in bboxes:
if cls_id == 'smoke' or cls_id == 'phone':
color = (0, 0, 255)
else:
color = (0, 255, 0)
c1, c2 = (x1, y1), (x2, y2)
cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(cls_id, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(image, '{}'.format(cls_id), (c1[0], c1[1] - 2), 0, tl / 3,
[225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
return image
def main(image):
# Test API
# for p in os.listdir(opt.source):
#
# img = cv2.imread(os.path.join(opt.source, p))
# result = process_image(predictor, img)
#
# img = plot_bboxes(img, result)
# cv2.imshow('result', img)
# cv2.waitKey(0)
result = process_image(predictor, image)
img = plot_bboxes(image, result)
return img
if __name__ == '__main__':
main()