一、概述
本人是一名刚刚入门人工智能领域的初学者,为了检验自己的学习成果和记录自己的学习过程,特写下这篇笔记。欢迎各位大佬们批评指正。如有侵权请联系
项目环境:
python==3.8.19
numpy==1.24.4
opencv-python==4.10.0.84
opencv-contrib-python==4.10.0.84
paddleocr==2.8.1
pillow==10.4.0
paddlepaddle==2.6.1
torch==1.10.1+cu102
pandas==2.0.3
二、基于opencv的检测与识别
大体上的思路是:我们读入一张图片,经过灰度转化,双边滤波或者高斯滤波,然后进行边缘检测,由于车牌的形状是属于长方形(以蓝底白字为例),先经过上述操作后可以将车牌的边缘检测出来,再通过车牌的颜色(hsv),取整张图片中与车牌最相似的前十个轮廓,然后再遍历这10个轮廓,与车牌的颜色最相近的找出来。再用PaddleOCR进行文字识别。
"""
author: XiaoShu
date: 2024-08-24
"""
import cv2
import numpy as np
from paddleocr import PaddleOCR
def reg_area_color(image):
"""找到原图像最多的颜色,当该颜色为红色或蓝色时返回该颜色的名称"""
kernel = np.ones((5, 5), np.uint8)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# 以上为图像处理
Open = cv2.morphologyEx(hsv, cv2.MORPH_OPEN, kernel)
# 对Open图像的H通道进行直方图统计
hist = cv2.calcHist(Open, [0], None, [180], [0, 180])
# 找到直方图hist中列方向最大的点hist_max
hist_max = np.where(hist == np.max(hist))
# hist_max[0]为hist_max的行方向的值,即H的值,H在0~10为红色
if 0 < hist_max[0] < 10:
res_color = 'red'
elif 100 < hist_max[0] < 124: # H在100~124为蓝色
res_color = 'blue'
else:
# H不在前两者之间跳出函数
res_color = 'unknow'
return res_color
img = cv2.imread('1.png')
img = cv2.resize(img, (1024, 768))
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blt_img = cv2.bilateralFilter(gray_img, 13, 15, 15)
canny_img = cv2.Canny(blt_img, 30, 200)
# close_img = cv2.morphologyEx(canny_img, cv2.MORPH_CLOSE, (5, 5))
contours, _ = cv2.findContours(canny_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
screenCnt = None
for cnt in contours:
L = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.018 * L, True)
if len(approx) == 4:
x, y, w, h = cv2.boundingRect(cnt)
crop_img = img[y: y + h, x: x + w]
if 'blue' == reg_area_color(crop_img):
screenCnt = approx
break
if screenCnt is not None:
cv2.drawContours(img, [screenCnt], -1, (0, 0, 255), 2)
print('yes')
cv2.imshow('img', img)
mask = np.zeros(blt_img.shape, np.uint8)
cv2.drawContours(mask, [screenCnt], 0, 255, -1, )
mask_img = cv2.bitwise_and(img, img, mask=mask)
# 获取车牌区域的所有坐标点
(x, y) = np.where(mask == 255)
# 获取底部顶点坐标
(topx, topy) = (np.min(x), np.min(y))
# 获取底部坐标
(bottomx, bottomy,) = (np.max(x), np.max(y))
# 剪裁
Cropped = blt_img[topx:bottomx, topy:bottomy]
ocr = PaddleOCR(use_angle_cls=True, use_gpu=True, ocr_version='PP-OCRv3')
text = ocr.ocr(Cropped, cls=True)
for t in text:
print(t[0][1])
cv2.imshow('mask_img', Cropped)
if cv2.waitKey(0) == ord(' '):
exit()
cv2.destroyAllWindows()
效果
我发现只用opencv识别效果不是很好,我换一张图片车牌就检测不出来了,通过调相关参数,效果依然欠佳,可能我没有考虑车牌模糊或者车牌倾斜的状态(本人太菜了)。之后我就想着能不能用更好的方法将车牌检测出来。我之前用opencv中dnn模块调用yolov3,就想着先用yolo将车牌检测出来再进行识别。
三、 yolov5 + opencv检测与识别
yolov5
我是自己在网上找的图片素材,然后进行手动标注的。
训练自己的数据集
先到GitHub中下载yolov5的源码。地址:https://github.com/ultralytics/yolov5
再下载一个预训练的yolov5模型,地址:
https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt
然后将yolov5s.pt放到文件根目录下
然后在源码的目录下新建立一个文件夹
然后再到dataset目录下再建立两个文件夹
分别在images和labels文件夹中再建立两个文件夹
在终端输入 labelimg,会出现以下界面
然后进入源码中做以下操作
然后在train.py修改以下参数
也可以修改epochs, batch-size,workers等参数,看自己需求。
然后就可以开始训练了
训练完成后会在runs文件夹中生成一个best.pt
以上就是训练自己数据集步骤
车牌检测与识别
思路:先用yolov5将车牌检测出来,再进行文本识别,
图片检测,具体代码如下:
"""
author: XiaoShu
date: 2024-08-26
"""
import numpy as np
from PIL import ImageFont, ImageDraw, Image
from paddleocr import PaddleOCR
import cv2
import torch
file_path = 'D:/python_program/myself/carpai_detect/me/yolov5-6.0' # 项目文件路径
def chinese_cv_text(img, xy, text, fill):
fontpath = "font/simsun.ttc"
font = ImageFont.truetype(fontpath, 40)
img_pil = Image.fromarray(img)
draw = ImageDraw.Draw(img_pil)
text = text
draw.text(xy, text, font=font, fill=fill)
img = np.array(img_pil)
return img
img2 = cv2.imread('3.png')
img = cv2.imread('3.png')[..., ::-1] # BGR 转为 RGB
model = torch.hub.load(file_path, 'custom', path='runs/train/exp2/weights/best.pt', source='local')
result = model(img, size=640)
# print(result.pandas().xyxy[0].to_numpy())
# print(result.show)
crops = result.crop(save=False)
x_min = int(crops[0]['box'][0].item())
y_min = int(crops[0]['box'][1].item())
x_max = int(crops[0]['box'][2].item())
y_max = int(crops[0]['box'][3].item())
img2 = img2[y_min:y_max, x_min:x_max]
ocr = PaddleOCR(use_angle_cls=True, use_gpu=True, ocr_version='PP-OCRv3')
text = ocr.ocr(img2, cls=True)
Text = ()
for t in text:
Text = t[0][1]
img = chinese_cv_text(img, (x_min + 20, y_min + 120), Text[0], (0, 0, 0, 0))
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2)
cv2.imshow('img', img)
if cv2.waitKey(0) == ord(' '):
exit()
cv2.destroyAllWindows()
视频检测,代码如下:
"""
author: XiaoShu
date: 2024-08-26
"""
import numpy as np
from PIL import ImageFont, ImageDraw, Image
from paddleocr import PaddleOCR
import cv2
import torch
file_path = 'D:/python_program/myself/carpai_detect/me/yolov5-6.0' # 项目文件路径
"""
由于opencv不能显示中文,所以需要定义一个函数,让opencv可以显示中文
"""
def chinese_cv_text(img, xy, text, fill):
fontpath = "font/simsun.ttc" # 电脑中自带字体
font = ImageFont.truetype(fontpath, 40)
img_pil = Image.fromarray(img)
draw = ImageDraw.Draw(img_pil)
text = text
draw.text(xy, text, font=font, fill=fill)
img = np.array(img_pil)
return img
cap = cv2.VideoCapture('1.mp4')
model = torch.hub.load(file_path, 'custom', path='runs/train/exp2/weights/best.pt', source='local') # 加载模型,就是yolo训练好的
model.conf = 0.7
ocr = PaddleOCR(use_angle_cls=True, use_gpu=True, ocr_version='PP-OCRv3') # 加载ocr模型,用来识别文本的
x_min, y_min, x_max, y_max = 0, 0, 0, 0
while True:
ret, frame = cap.read()
img_cvt = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = model(img_cvt)
# results_:[[271.80584716796875 129.771484375 545.2171630859375 239.5855255126953 0.81005859375 0 'plate']]
results_ = results.pandas().xyxy[0].to_numpy() # results_代表着长方形框的对角坐标和预测的自信度,名称
for box in results_:
x_min, y_min, x_max, y_max = box[:4].astype(int)
confidence = str(round(box[4] * 100, 2)) + '%'
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) # 画框,车牌
cv2.putText(frame, confidence, (x_min + 20, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # 显示自信度
img2 = frame[y_min:y_max, x_min:x_max] # 对图片进行切片,目的是为了将车牌抠出。好进行文字识别
text = ocr.ocr(img2, cls=True)
Text = ()
for t in text:
Text = t[0][1]
frame = chinese_cv_text(frame, (x_min + 20, y_min - 70), Text[0], (0, 0, 255, 0)) # 展示车牌号
cv2.imshow('frame', frame)
if cv2.waitKey(5) == ord(' '):
break
cap.release()
cv2.destroyAllWindows()
效果
图片:
视频效果
yolov5 + opencv车牌检测与识别