基于深度学习神经网络YOLOv4目标检测的手势识别系统,其能识别的手势有8种,见如下:
第一步:YOLOv4介绍
YOLOv4是一种目标检测算法,它在精度和速度之间取得了最佳的平衡。它是YOLO(You Only Look Once)系列算法的最新版本,通过将目标检测任务转化为一个回归问题,实现了实时目标检测。YOLOv4采用了一系列的调优手段,使得其在目标检测任务中表现出色。
YOLOv4的框架原理主要包括以下几个方面:
- BackBone:YOLOv4使用了CSPDarknet53作为其主干网络,该网络结构具有较强的特征提取能力。
- 训练策略:YOLOv4采用了多尺度训练和数据增强等策略来提高模型的泛化能力和检测精度。
- 推理策略:YOLOv4使用了多尺度推理和后处理技术来提高检测速度和准确性。
- 检测头训练策略:YOLOv4使用了Mosaic数据增强和CIoU损失函数等策略来提高小目标的检测精度。
- 检测头推理策略:YOLOv4使用了YOLOv3和YOLOv4的检测头结合策略,提高了模型的检测能力。
总之,YOLOv4是一种高效准确的目标检测算法,具有较好的精度和速度表现。它在目标检测领域具有广泛的应用前景。
标注数据,YOLOv4的训练和测试步骤,各路大神都已经做了很多工作,我就不再写了,这里有几个写的比较好的博客可以参考:
【项目实践】YOLO V4万字原理详细讲解并训练自己的数据集(pytorch完整项目打包下载)-腾讯云开发者社区-腾讯云
第二步:YOLOv4网络结构
第三步:代码展示
def read_file(self): #选取文件
def E2C(label):
predefined_En=["plate","0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z","Ao","CA","Er","Ga","Gn","Gg","Gu","Gi","Hi","Hu","Je","Ji","Jg","Jn","Jig","Jin","Li","Lu","Mg","Mi","Ng","Qg","Qo","Sh","Su","Wa","Xi","Xg","Xu","Yu","Yui","Ye","Yn","Zh","Za"
]
predefined_CN=["plate","0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z","澳","川","鄂","甘","赣","港","贵","桂","黑","沪","吉","冀","津","晋","京","警","辽","鲁","蒙","闽","宁","青","琼","陕","苏","皖","湘","新","学","渝","豫","粤","云","浙","藏"]
#找到英文label名称在list中的位置
loc = predefined_En.index(label)
#显示对应位置的中文名称
label_CN=predefined_CN[loc]
return label_CN
def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=35):
if (isinstance(img, np.ndarray)): # 判断是否OpenCV图片类型
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# 创建一个可以在给定图像上绘图的对象
draw = ImageDraw.Draw(img)
# 字体的格式
fontStyle = ImageFont.truetype(
"simsun.ttc", textSize, encoding="utf-8")
# 绘制文本
draw.text((left, top), text, textColor, font=fontStyle)
# 转换回OpenCV格式
result=cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
return result
fileName_choose, filetype = QFileDialog.getOpenFileName(
self, "选取图片文件",
getcwd(), # 起始路径
"图片(*.jpg;*.jpeg;*.png)") # 文件类型
image=cv2.imread(fileName_choose)
image = image[:, :, ::-1].copy()
a=[]
b=[]
left1=0
top1=0
classes, confidences, boxes = net.detect(image, confThreshold, nmsThreshold)
for (classid, score, box) in zip(classes, confidences, boxes):
left, top, width, height = box
if classid ==0:
#在imgA上画出矩形
#box1=left, top, width, height
left1=left
top1=top
cv2.rectangle(image, box, color1, 3)
else :
label = class_names[classid]#标签置信度
confidence=score*100
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_CN = E2C(label)
a.append(left)
b.append(label_CN)
c=zip(a,b)
d=sorted(c,key=lambda x:x[0])
e=zip(*d)
a,b=[list(x) for x in e]
h="".join(b)
text = "{}".format(h)
self.textBrowser.setText(text)
#在imgA上显示中文标签+置信度
image = cv2ImgAddText(image, text, left1,top1-20)
height = image.shape[0]
width = image.shape[1]
frame = QImage(image, width, height, width*3,QImage.Format_RGB888)
pix = QPixmap.fromImage(frame)
self.item = QGraphicsPixmapItem(pix)
self.scene = QGraphicsScene() # 创建场景
self.scene.addItem(self.item)
self.graphicsView.setScene(self.scene)
def read_voc(self): #选取文件
videoName, _ = QFileDialog.getOpenFileName(self, "Open", "", "*.mp4;;*.avi;;All Files(*)")
self.cap = cv2.VideoCapture(videoName)
self.timer_camera.start(1)
self.timer_camera.timeout.connect(self.voc)
def voc(self):
global g
global text1
def E2C(label):
predefined_En=["plate","0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z","Ao","CA","Er","Ga","Gn","Gg","Gu","Gi","Hi","Hu","Je","Ji","Jg","Jn","Jig","Jin","Li","Lu","Mg","Mi","Ng","Qg","Qo","Sh","Su","Wa","Xi","Xg","Xu","Yu","Yui","Ye","Yn","Zh","Za"
]
predefined_CN=["plate","0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z","澳","川","鄂","甘","赣","港","贵","桂","黑","沪","吉","冀","津","晋","京","警","辽","鲁","蒙","闽","宁","青","琼","陕","苏","皖","湘","新","学","渝","豫","粤","云","浙","藏"]
#找到英文label名称在list中的位置
loc = predefined_En.index(label)
#显示对应位置的中文名称
label_CN=predefined_CN[loc]
return label_CN
第四步:运行
运行界面:
识别效果:
第五步:整个工程的内容(包括训练代码和数据)
代码的下载路径(新窗口打开链接):基于深度学习神经网络YOLOv4目标检测的手势识别系统
有问题可以私信或者留言,有问必答