车牌检测(分割+识别)

本文介绍了如何使用YOLOv5模型进行车牌检测,然后利用MobileNetV2模型进行车牌号识别,包括车牌的预处理、检测、裁剪、识别和文本解码等步骤。文章强调了在复杂背景下的车牌检测和识别策略。
摘要由CSDN通过智能技术生成
  • 简介
    本篇文章介绍如何实现,车辆车牌的检测分割,以及车牌的识别

实现步骤

  1. 基于YOLOv5模型,训练车牌检测器,训练得到权重”yolov5m.pt“;
  2. 基于MobileNetV2模型,训练车牌号识别器,训练得到权重”;crnn_rnet.pt“;
    (上述权重不分享,请需要的uu自行训练或找资源
  3. 输入待检测图片,车牌检测器输出车牌截图crop;
  4. 将车牌crop输入车牌号识别器(类似encoder)和车牌颜色识别器,分别输出预测;
  5. 对车牌号预测结果进行文本译码,得到最终预测的车牌号。

补充:更为完善的流程是在进行车牌识别前,先进行车牌对齐的前处理,感兴趣的uu自行搜索,或者问我
其实如果输入的车牌图像,只含有足够清晰、完整的车牌,那么只使用车牌号识别器也足够了,但由于甲方那边拿来检测的图像含有嘈杂的背景,所以只能先进行车牌的检测分割,再识别。(后面给关键代码的时候也会由简至难

  • 全局部分
import os
import numpy as np
import cv2
import random
import torch
import json

from torchvision import transforms
# 下面是在导入车牌号识别器的backbone哦,没有得导会报错的
from crnn.crnn_rnet import CRNN_RNET

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

# 初始化车牌号识别器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pt = "./crnn_rnet.pt"
config = torch.load(pt)["config"]
backbone = CRNN_RNET(
    nChannel=3,
    nHeight=config.height,
    nClass=config.maxLabel,
    nHidden=config.rnet.hidden,
).to(device)
backbone.load_state_dict(torch.load(pt)["data"])
backbone.eval()

(下面直接给关键代码,不理解请评论区

文本译码(被获取车牌信息函数调用)

def text_decode(config, data, confs, length):
    if length.numel() == 1:
        length = length[0]
        assert (
            data.numel() == length
        ), "text with length: {} does not match declared length: {}".format(
            data.numel(), length
        )
        char_list = []
        confs_list = []
        for i in range(length):
            if data[i] != 0 and (not (i > 0 and data[i - 1] == data[i])):
                if config.text[data[i] - 1] != " ":
                    char_list.append(config.text[data[i] - 1])
                    confs_list.append(confs[i].item())
        confs_list = [i * 100 for i in confs_list]
        confs_list = [round(x, 1) for x in confs_list]
        plateCharReliability = ""
        for i in range(0, len(char_list)):
            if i != len(char_list) - 1:
                plateCharReliability += f"{char_list[i]}-{confs_list[i]}%, "
            else:
                plateCharReliability += f"{char_list[i]}-{confs_list[i]}%"
        return "".join(char_list), plateCharReliability
    else:
        assert (
            data.numel() == length.sum()
        ), "texts with length: {} does not match declared length: {}".format(
            data.numel(), length.sum()
        )
        texts = []
        index = 0
        for i in range(length.numel()):
            l = length[i]
            texts.append(config.text(data[index : index + l], torch.IntTensor([l])))
            index += l
        return texts

获取车牌信息

transform = transforms.Compose([transforms.ToTensor()])

def get_plate_info(plate_path):
    im = cv2.imread(plate_path)
    # plate_color_recognition是车牌颜色识别函数,因为用的是直接统计像素值,简单粗暴的方法(并且代码又臭又长),不好意思在这里展示了,但如果有uu需要也可以找我要
    # vehicleColor = plate_color_recognition(im)
    im = cv2.resize(im, (config.width, config.height))
    im = transform(im)
    im = im[None].to(device)
    with torch.no_grad():
        preds = backbone(im)
        confs, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        confs = confs.transpose(1, 0).contiguous().view(-1)
        preds_size = torch.IntTensor([preds.size(0)] * im.size(0))
    sim_preds, plateCharReliability = text_decode(
        config, preds.data, confs.data, preds_size.data
    )
    # 下面这句单纯为了打印出来比较好看
    sim_preds = sim_preds.replace(" ", "")
    # 打印车牌号,以及每位号牌号码可信度
    # 例:闽D123456 闽-100.0%, D-100.0%, 1-100.0%, 2-100.0%, 3-100.0%, 4-100.0%, 5-100.0%, 6-100.0%
    print(sim_preds, plateCharReliability)
    # return sim_preds, plateCharReliability

车牌检测器(YOLOv5)

其实和ultralytics给出的predict.py代码一样(run(), parse_opt(), main())
YOLOv5项目地址: https://github.com/ultralytics/yolov5
predict.py: https://github.com/ultralytics/yolov5/blob/master/segment/predict.py
我对源码进行了修改,破破烂烂的,大家就当看看得了,参考还是要看源码


@smart_inference_mode()
def run(
        weights=ROOT / "yolov5m.pt",
        source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
        data=ROOT / "data/coco128.yaml",  # dataset.yaml path
        imgsz=(640, 640),  # inference size (height, width)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_csv=False,  # save results in CSV format
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=[0],  # 只有车牌类,传给yolov5m
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project=ROOT / "../results/detect",  # save results to project/name
        name="result",  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
        vid_stride=1,  # video frame-rate stride
):
    source = str(source)
    save_img = not nosave and not source.endswith(".txt")  # save inference images
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
    webcam = (
            source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
    )
    screenshot = source.lower().startswith("screen")
    if is_url and is_file:
        source = check_file(source)  # download

    # Directories
    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
    (save_dir / "labels" if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True
    )  
    # Load model
    device = select_device(device)
    # Load plate detector model
    model = DetectMultiBackend(
        weights, device=device, dnn=dnn, data=data, fp16=half
    )
    stride, names, pt = model.stride, model.names, model.pt

    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    bs = 1  # batch_size
    if webcam:
        view_img = check_imshow(warn=True)
        dataset = LoadStreams(
            source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride
        )
        bs = len(dataset)
    elif screenshot:
        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
    else:
        dataset = LoadImages(
            source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride
        )
    vid_path, vid_writer = [None] * bs, [None] * bs

    plate_path = ""
    # Run inference
    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
    for path, im, im0s, vid_cap, s in dataset:
        # 以下遍历每一帧提取的内容
        with dt[0]:
            im = torch.from_numpy(im).to(model.device)
            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
            im /= 255  # 0 - 255 to 0.0 - 1.0
            if len(im.shape) == 3:
                im = im[None]  # expand for batch dim

        # Inference
        with dt[1]:
            visualize = (
                increment_path(save_dir / Path(path).stem, mkdir=True)
                if visualize
                else False
            )
            pred = model(im, augment=augment, visualize=visualize)

        # NMS
        with dt[2]:
            pred = non_max_suppression(
                pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det
            )

        # Process predictions
        # pred包含了这一帧里所有检测到的目标,但是该循环只会进行一次,该循环还没有进入到对每个检测框的遍历
        for i, det in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                # s += f'{i}: '
            else:
                p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg
            txt_path = str(save_dir / "labels" / p.stem) + (
                "" if dataset.mode == "image" else f"_{frame}"
            )  # im.txt
            # s += '%gx%g ' % im.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(
                im0, line_width=line_thickness, example=str(names)
            )
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    im.shape[2:], det[:, :4], im0.shape
                ).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    if c == 0:
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    
                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class,c=0时为车牌
                        if c == 0:
                            plate_path = save_one_box(
                                xyxy,
                                imc,
                                file=save_dir
                                        / "crops"
                                        / names[c]
                                        / f"{p.stem}.jpg",
                                BGR=True,
                            )
                            
            else:
                print("no plate")
                return 0
                
    if save_txt or save_img:
        s = (
            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
            if save_txt
            else ""
        )
        # LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
    return plate_path

def parse_opt(source):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--weights",
        nargs="+",
        type=str,
        default=ROOT / "yolov5m.pt",
        help="model path or triton URL",
    )
    parser.add_argument(
        "--source",
        type=str,
        default=ROOT / "data/images",
        help="file/dir/URL/glob/screen/0(webcam)",
    )
    parser.add_argument(
        "--data",
        type=str,
        default=ROOT / "data/coco128.yaml",
        help="(optional) dataset.yaml path",
    )
    parser.add_argument(
        "--imgsz",
        "--img",
        "--img-size",
        nargs="+",
        type=int,
        default=[640],
        help="inference size h,w",
    )
    parser.add_argument(
        "--conf-thres", type=float, default=0.3, help="confidence threshold"
    )
    parser.add_argument(
        "--iou-thres", type=float, default=0.45, help="NMS IoU threshold"
    )
    parser.add_argument(
        "--max-det", type=int, default=1000, help="maximum detections per image"
    )

    parser.add_argument(
        "--device", default="0", help="cuda device, i.e. 0 or 0,1,2,3 or cpu"
    )
    parser.add_argument("--view-img", action="store_true", help="show results")
    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
    parser.add_argument(
        "--save-csv", action="store_true", help="save results in CSV format"
    )
    parser.add_argument(
        "--save-conf", action="store_true", help="save confidences in --save-txt labels"
    )
    parser.add_argument(
        "--save-crop", action="store_true", help="save cropped prediction boxes"
    )
    parser.add_argument(
        "--nosave", action="store_true", help="do not save images/videos"
    )
    parser.add_argument(
        "--classes",
        nargs="+",
        type=int,
        default=[0],
        help="filter by class: --classes 0, or --classes 0 2 3",
    )

    parser.add_argument(
        "--agnostic-nms", action="store_true", help="class-agnostic NMS"
    )
    parser.add_argument("--augment", action="store_true", help="augmented inference")
    parser.add_argument("--visualize", action="store_true", help="visualize features")
    parser.add_argument("--update", action="store_true", help="update all models")
    parser.add_argument(
        "--project",
        default=ROOT / "../results/interface4",
        help="save results to project/name",
    )
    parser.add_argument("--name", default="exp", help="save results to project/name")
    parser.add_argument(
        "--exist-ok",
        action="store_true",
        help="existing project/name ok, do not increment",
    )
    parser.add_argument(
        "--line-thickness", default=3, type=int, help="bounding box thickness (pixels)"
    )
    parser.add_argument(
        "--hide-labels", default=False, action="store_true", help="hide labels"
    )
    parser.add_argument(
        "--hide-conf", default=False, action="store_true", help="hide confidences"
    )
    parser.add_argument(
        "--half", action="store_true", help="use FP16 half-precision inference"
    )
    parser.add_argument(
        "--dnn", action="store_true", help="use OpenCV DNN for ONNX inference"
    )
    parser.add_argument(
        "--vid-stride", type=int, default=30, help="video frame-rate stride"
    )
    opt = parser.parse_args()
    opt.source = source
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(vars(opt))
    return opt

小Trick

先检测分割后识别有一个避免不了的bug,如果检测车牌的检测器不够准确会直接影响识别的准确度
例如:1. 分割出来的车牌不完整;2. 车牌占比过大,没检测到车牌
(针对两个bug的代码,我删了,但我可以描述我处理的方法)

  1. 分割不完整,一般是车牌高准确,宽不足,一般车牌长×宽=140×440(mm)比例约为3.14,可以根据比例调整分割出来的车牌crop
  2. 因为占比过大导致检测不出车牌,那就跳过车牌检测器,直接使用车牌识别器
  • 4
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值