利用SEQ2SEQ模型实现车牌识别

下面完整代码在github仓库:传送门


一、利用车牌模版和字体模型生成车牌

效果如下:
在这里插入图片描述
代码如下:

import numpy as np
import cv2, os, argparse
from glob import glob
from tqdm import tqdm

from plate_number import random_select, generate_plate_number_white, generate_plate_number_yellow_xue
from plate_number import generate_plate_number_black_gangao, generate_plate_number_black_shi, generate_plate_number_black_ling
from plate_number import generate_plate_number_blue, generate_plate_number_yellow_gua
from plate_number import letters, digits


def get_location_data(length=7, split_id=1, height=140):
    """
    获取车牌号码在底牌中的位置
    length: 车牌字符数,7或者8,7为普通车牌、8为新能源车牌
    split_id: 分割空隙
    height: 车牌高度,对应单层和双层车牌
    """
    # 字符位置
    location_xy = np.zeros((length, 4), dtype=np.int32)

    # 单层车牌高度
    if height == 140:
        # 单层车牌,y轴坐标固定
        location_xy[:, 1] = 25
        location_xy[:, 3] = 115
        # 螺栓间隔
        step_split = 34 if length == 7 else 49
        # 字符间隔
        step_font = 12 if length == 7 else 9

        # 字符宽度
        width_font = 45
        for i in range(length):
            if i == 0:
                location_xy[i, 0] = 15
            elif i == split_id:
                location_xy[i, 0] = location_xy[i - 1, 2] + step_split
            else:
                location_xy[i, 0] = location_xy[i - 1, 2] + step_font
            # 新能源车牌
            if length == 8 and i > 0:
                width_font = 43
            location_xy[i, 2] = location_xy[i, 0] + width_font
    else:
        # 双层车牌第一层
        location_xy[0, :] = [110, 15, 190, 75]
        location_xy[1, :] = [250, 15, 330, 75]

        # 第二层
        width_font = 65
        step_font = 15
        for i in range(2, length):
            location_xy[i, 1] = 90
            location_xy[i, 3] = 200
            if i == 2:
                location_xy[i, 0] = 27
            else:
                location_xy[i, 0] = location_xy[i - 1, 2] + step_font
            location_xy[i, 2] = location_xy[i, 0] + width_font

    return location_xy


# 字符贴上底板
def copy_to_image_multi(img, font_img, bbox, bg_color, is_red):
    x1, y1, x2, y2 = bbox
    font_img = cv2.resize(font_img, (x2 - x1, y2 - y1))
    img_crop = img[y1: y2, x1: x2, :]

    if is_red:
        img_crop[font_img < 200, :] = [0, 0, 255]
    elif 'blue' in bg_color or 'black' in bg_color:
        img_crop[font_img < 200, :] = [255, 255, 255]
    else:
        img_crop[font_img < 200, :] = [0, 0, 0]
    return img


class MultiPlateGenerator:
    def __init__(self, adr_plate_model, adr_font):
        # 车牌底板路径
        self.adr_plate_model = adr_plate_model
        # print(self.adr_plate_model)  # plate_model

        # 车牌字符路径
        self.adr_font = adr_font
        # print(self.adr_font)  # font_model

        # 车牌字符图片,预存处理
        self.font_imgs = {}
        font_filenames = glob(os.path.join(adr_font, '*jpg'))
        # print(os.path.join(adr_font, '*jpg'))  # font_model\*jpg
        # print(font_filenames)  # ['font_model\\140_0.jpg', 'font_model\\140_1.jpg'...]
        # print(len(font_filenames))  # 227

        for font_filename in font_filenames:
            # print(font_filename)
            # font_img = cv2.imread(font_filename, cv2.IMREAD_GRAYSCALE)  # windows环境无法读取中文字符
            font_img = cv2.imdecode(np.fromfile(font_filename, dtype=np.uint8), 0)
            # print(np.shape(font_img))  # (250, 125)

            if '140' in font_filename:
                font_img = cv2.resize(font_img, (45, 90))
                # print(np.shape(font_img))  # (90, 45)
                # print(font_filename)  # font_model\140_0.jpg
                # print(font_filename.split('_'))  # ['font', 'model\\140', '0.jpg']
                # print(font_filename.split('_')[-1])  # 0.jpg
                # print(font_filename.split('_')[-1].split('.'))  # ['0', 'jpg']
                # print(font_filename.split('_')[-1].split('.')[0])  # 0
                # print(os.path.basename(font_filename))  # 140_0.jpg
                # print(os.path.basename(font_filename).split('.'))  # ['140_0', 'jpg']
                # print(os.path.basename(font_filename).split('.')[0])  # 140_0
                # print(font_img)

            elif '220' in font_filename:
                font_img = cv2.resize(font_img, (65, 110))
            elif font_filename.split('_')[-1].split('.')[0] in letters + digits:
                font_img = cv2.resize(font_img, (43, 90))
            self.font_imgs[os.path.basename(font_filename).split('.')[0]] = font_img
            # print(self.font_imgs)  # 字典里面的内容为:一张图片对应相应数据
        # print(len(self.font_imgs))

        # 字符位置
        self.location_xys = {}
        for i in [7, 8]:
            for j in [1, 2, 4]:
                for k in [140, 220]:
                    self.location_xys['{}_{}_{}'.format(i, j, k)] = \
                        get_location_data(length=i, split_id=j, height=k)

    # 获取字符位置
    def get_location_multi(self, plate_number, height=140):
        length = len(plate_number)
        if '警' in plate_number:
            split_id = 1
        elif '使' in plate_number:
            split_id = 4
        else:
            split_id = 2
        return self.location_xys['{}_{}_{}'.format(length, split_id, height)]

    # 随机生成车牌号码,获取底板颜色、单双层
    def generate_plate_number(self):
        rate = np.random.random(1)
        if rate > 0.4:
            # 蓝牌
            plate_number = generate_plate_number_blue(length=random_select([7, 8]))
        else:
            # 白牌、黄牌教练车、黄牌挂车、黑色港澳、黑色使、领馆
            generate_plate_number_funcs = [generate_plate_number_white,
                                           generate_plate_number_yellow_xue,
                                           generate_plate_number_yellow_gua,
                                           generate_plate_number_black_gangao,
                                           generate_plate_number_black_shi,
                                           generate_plate_number_black_ling]
            plate_number = random_select(generate_plate_number_funcs)()

        # 车牌底板颜色
        bg_color = random_select(['blue'] + ['yellow'])

        if len(plate_number) == 8:
            bg_color = random_select(['green_car'] * 10 + ['green_truck'])
        elif len(set(plate_number) & set(['使', '领', '港', '澳'])) > 0:
            bg_color = 'black'
        elif '警' in plate_number or plate_number[0] in letters:
            bg_color = 'white'
        elif len(set(plate_number) & set(['学', '挂'])) > 0:
            bg_color = 'yellow'

        is_double = random_select([False] + [True] * 3)

        if '使' in plate_number:
            bg_color = 'black_shi'

        if '挂' in plate_number:
            # 挂车双层
            is_double = True
        elif len(set(plate_number) & set(['使', '领', '港', '澳', '学', '警'])) > 0 \
                or len(plate_number) == 8 or bg_color == 'blue':
            # 使领港澳学警、新能源、蓝色都是单层
            is_double = False

        # special,首字符为字母、单层则是军车
        if plate_number[0] in letters and not is_double:
            bg_color = 'white_army'

        return plate_number, bg_color, is_double

    # 随机生成车牌图片
    def generate_plate(self, enhance=False):
        plate_number, bg_color, is_double = self.generate_plate_number()
        height = 220 if is_double else 140

        # 获取底板图片
        # print(plate_number, height, bg_color, is_double)
        number_xy = self.get_location_multi(plate_number, height)
        img_plate_model = cv2.imread(os.path.join(self.adr_plate_model, '{}_{}.PNG'.format(bg_color, height)))
        img_plate_model = cv2.resize(img_plate_model, (440 if len(plate_number) == 7 else 480, height))

        for i in range(len(plate_number)):
            if len(plate_number) == 8:
                # 新能源
                font_img = self.font_imgs['green_{}'.format(plate_number[i])]
            else:
                if '{}_{}'.format(height, plate_number[i]) in self.font_imgs:
                    font_img = self.font_imgs['{}_{}'.format(height, plate_number[i])]
                else:
                    # 双层车牌字体库
                    if i < 2:
                        font_img = self.font_imgs['220_up_{}'.format(plate_number[i])]
                    else:
                        font_img = self.font_imgs['220_down_{}'.format(plate_number[i])]

            # 字符是否红色
            if (i == 0 and plate_number[0] in letters) or plate_number[i] in ['警', '使', '领']:
                is_red = True
            elif i == 1 and plate_number[0] in letters and np.random.random(1) > 0.5:
                # second letter of army plate
                is_red = True
            else:
                is_red = False

            if enhance:
                k = np.random.randint(1, 6)
                kernel = np.ones((k, k), np.uint8)
                if np.random.random(1) > 0.5:
                    font_img = np.copy(cv2.erode(font_img, kernel, iterations=1))
                else:
                    font_img = np.copy(cv2.dilate(font_img, kernel, iterations=1))

            # 贴上底板
            img_plate_model = copy_to_image_multi(img_plate_model, font_img,
                                                  number_xy[i, :], bg_color, is_red)

        img_plate_model = cv2.blur(img_plate_model, (3, 3))

        return img_plate_model, number_xy, plate_number, bg_color, is_double

    def generate_plate_special(self, plate_number, bg_color, is_double, enhance=False):
        """
        生成特定号码、颜色车牌
        :param plate_number: 车牌号码
        :param bg_color: 背景颜色,black/black_shi(使领馆)/blue/green_car(新能源轿车)/green_truck(新能源卡车)/white/white_army(军队)/yellow
        :param is_double: 是否双层
        :param enhance: 图像增强
        :return: 车牌图
        """
        height = 220 if is_double else 140

        # print(plate_number, height, bg_color, is_double)
        number_xy = self.get_location_multi(plate_number, height)
        img_plate_model = cv2.imread(os.path.join(self.adr_plate_model, '{}_{}.PNG'.format(bg_color, height)))
        img_plate_model = cv2.resize(img_plate_model, (440 if len(plate_number) == 7 else 480, height))

        for i in range(len(plate_number)):
            if len(plate_number) == 8:
                font_img = self.font_imgs['green_{}'.format(plate_number[i])]
            else:
                if '{}_{}'.format(height, plate_number[i]) in self.font_imgs:
                    font_img = self.font_imgs['{}_{}'.format(height, plate_number[i])]
                else:
                    if i < 2:
                        font_img = self.font_imgs['220_up_{}'.format(plate_number[i])]
                    else:
                        font_img = self.font_imgs['220_down_{}'.format(plate_number[i])]

            if (i == 0 and plate_number[0] in letters) or plate_number[i] in ['警', '使', '领']:
                is_red = True
            elif i == 1 and plate_number[0] in letters and np.random.random(1) > 0.5:
                # second letter of army plate
                is_red = True
            else:
                is_red = False

            if enhance:
                k = np.random.randint(1, 6)
                kernel = np.ones((k, k), np.uint8)
                if np.random.random(1) > 0.5:
                    font_img = np.copy(cv2.erode(font_img, kernel, iterations=1))
                else:
                    font_img = np.copy(cv2.dilate(font_img, kernel, iterations=1))

            img_plate_model = copy_to_image_multi(img_plate_model, font_img,
                                                  number_xy[i, :], bg_color, is_red)

        # is_double = 'double' if is_double else 'single'
        img_plate_model = cv2.blur(img_plate_model, (3, 3))

        return img_plate_model


def parse_args():
    parser = argparse.ArgumentParser(description='中国车牌生成器')
    parser.add_argument('--number', default=50000, type=int, help='生成车牌数量')
    # parser.add_argument('--save-adr', default='multi_val', help='车牌保存路径')
    parser.add_argument('--save-adr', default='blue_plate', help='车牌保存路径')
    args = parser.parse_args()
    return args


def mkdir(path):
    try:
        os.makedirs(path)
    except:
        pass


if __name__ == '__main__':
    args = parse_args()
    # print(args)
    # 随机生成车牌
    print('save in {}'.format(args.save_adr))

    mkdir(args.save_adr)  # 在左侧目录创建一个文件夹

    generator = MultiPlateGenerator('plate_model', 'font_model')

    for i in tqdm(range(args.number)):
        img, number_xy, gt_plate_number, bg_color, is_double = generator.generate_plate()
        print(np.shape(img), number_xy, gt_plate_number, bg_color, is_double)
        # 保存所有颜色的车牌
        # cv2.imencode('.jpg', img)[1].tofile(os.path.join(args.save_adr, '{}_{}_{}.jpg'.format(gt_plate_number,
        #                                                                                       bg_color, is_double)))
        # cv2.imwrite(os.path.join(args.save_adr, '{}_{}_{}.jpg'.format(gt_plate_number, bg_color, is_double)), img)

        if bg_color == 'blue':  # 保存蓝色车牌
            cv2.imencode('.jpg', img)[1].tofile(os.path.join(args.save_adr, '{}_{}_{}.jpg'.format(gt_plate_number,
                                                                                                  bg_color, is_double)))

        # cv2.imwrite('{}.jpg'.format(plate_number), img)
        # cv2.imencode('.jpg', img)[1].tofile(gt_plate_number + '.jpg')


二、读取车牌数据

       需要注意的是,大写字母‘I’,‘O’不允许出现在车牌号码中,所以大写字母数总共有24个,再加上10个数字和31个省份简称。最后数据维度就为65维。

import os
import torch
import numpy as np
from PIL import Image
import torch.utils.data as data
from torchvision import transforms
from sklearn.preprocessing import OneHotEncoder
import cv2
from utils import StrtoLabel
from utils import trans_square

# 维度:31+10+24=65
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
nums = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

upper_char = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U',
              'V', 'W', 'X', 'Y', 'Z']
# print(len(upper_char))  # 24

Province_symbol = ['藏', '川', '鄂', '甘', '赣', '贵', '桂', '黑', '沪', '吉', '冀', '津', '晋', '京',
                   '辽', '鲁', '蒙', '闽', '宁', '青', '琼', '陕', '苏', '皖', '湘', '新', '渝', '豫',
                   '粤', '云', '浙']
# print(len(Province_symbol))  # 31


class Sampling(data.Dataset):
    def __init__(self, root):
        self.transform = data_transforms
        self.imgs = []
        self.labels = []
        for filenames in os.listdir(root):
            x = os.path.join(root, filenames)
            y = filenames.split('_')[0]

            self.imgs.append(x)
            self.labels.append(y)

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, index):
        img_path = self.imgs[index]

        img = Image.open(img_path)
        # img = trans_square(img)
        img = self.transform(img)

        label = self.labels[index]
        # print(label)  # f3iX

        label = StrtoLabel(label)  # 将字母转成数字表示,方便做one-hot
        # print(label)

        label = self.one_hot(label)
        # print(label)

        return img, label

    def one_hot(self, x):

        z = np.zeros(shape=[7, 65])
        for i in range(7):
            index = int(x[i])
            z[i][index] = 1

        return z


if __name__ == '__main__':
    samping = Sampling("./blue_plate")
    dataloader = data.DataLoader(samping, 10, shuffle=True)
    for i, (img, label) in enumerate(dataloader):
        print(i)
        print(img.shape)  # torch.Size([10, 3, 140, 440])
        print(label.shape)  # torch.Size([10, 3, 140, 440])

三、搭建SEQ2SEQ网络模型

下面网络模型使用了双向LSTM,也可以改为单向。

代码如下:

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(420, 128),
            nn.BatchNorm1d(num_features=128),
            nn.ReLU()
        )
        self.lstm = nn.LSTM(input_size=128,
                            hidden_size=128,
                            num_layers=2,
                            batch_first=True,
                            bidirectional=True)

    def forward(self, x):
        # print(np.shape(x))  # torch.Size([64, 3, 140, 440])

        x = x.reshape(-1, 420, 440).permute(0, 2, 1)
        # print(np.shape(x))  # torch.Size([64, 440, 420])

        x = x.reshape(-1, 420)
        # print(np.shape(x))  # torch.Size([28160, 420])

        fc1 = self.fc1(x)
        # print(np.shape(fc1))  # torch.Size([28160, 128])

        fc1 = fc1.reshape(-1, 440, 128)
        # print(np.shape(fc1))  # torch.Size([64, 440, 128])

        lstm, (h_n, h_c) = self.lstm(fc1, None)
        # print(np.shape(lstm))  # torch.Size([64, 440, 256])

        out = lstm[:, -1, :]
        # print(np.shape(out))  # torch.Size([64, 256])

        return out


class Decoder(nn.Module):
    def __init__(self, bidirectional=True):
        super(Decoder, self).__init__()
        self.lstm = nn.LSTM(input_size=256,
                            hidden_size=128,
                            num_layers=2,
                            batch_first=True,
                            bidirectional=True)

        if bidirectional == True:
            self.out = nn.Linear(128*2, 65)
        else:
            self.out = nn.Linear(128, 65)  # 定义全连接层

    def forward(self, x):
        # print(np.shape(x))  # torch.Size([64, 256])

        x = x.reshape(-1, 1, 256)
        # print(np.shape(x))  # torch.Size([64, 1, 256])

        x = x.expand(-1, 7, 256)
        # print(np.shape(x))  # torch.Size([64, 7, 256])

        lstm, (h_n, h_c) = self.lstm(x, None)
        # print(np.shape(lstm))  # torch.Size([64, 7, 256])

        y1 = lstm.reshape(-1, 128*2)
        # print(np.shape(y1))  # torch.Size([448, 256])

        out = self.out(y1)
        # print(np.shape(out))  # torch.Size([448, 65])

        output = out.reshape(-1, 7, 65)  # 10表示输出十个值,可以更改
        # print(np.shape(output))  # torch.Size([64, 4, 65])  64

        return output


class MainNet(nn.Module):
    def __init__(self):
        super(MainNet, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()

    def forward(self, x):
        encoder = self.encoder(x)
        decoder = self.decoder(encoder)

        return decoder

四、开始训练网络模型

训练批次一般给64,给大了容易爆显存。

if __name__ == '__main__':
    BATCH = 64
    EPOCH = 10000
    save_path = r'params/seq2seq.pth'

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = MainNet().to(device)

    opt = torch.optim.Adam(net.parameters())
    loss_func = nn.MSELoss()

    if os.path.exists(save_path):
        net.load_state_dict(torch.load(save_path))
    else:
        print("No Params!")

    train_data = Sampling_train.Sampling(root="./blue_plate")
    valida_data = Sampling_train.Sampling(root="./blue_plate2")
    train_loader = data.DataLoader(dataset=train_data,
                                   batch_size=BATCH, shuffle=True)
    valida_loader = data.DataLoader(dataset=valida_data, batch_size=64, shuffle=True)

    for epoch in range(EPOCH):
        for i, (x, y) in enumerate(train_loader):
            # print(np.shape(x))  # torch.Size([384, 3, 224, 224])
            # print(np.shape(y))  # torch.Size([64, 7, 65])

            batch_x = x.to(device)
            batch_y = y.float().to(device)

            output = net(batch_x)
            # print(np.shape(output))  # torch.Size([64, 7, 65])

            loss = loss_func(output, batch_y)
            # print(loss.item())

            opt.zero_grad()
            loss.backward()
            opt.step()

            if i % 10 == 0:
                label_y = torch.argmax(y, 2).detach().numpy()
                # print(label_y)

                out_y = torch.argmax(output, 2).cpu().detach().numpy()
                # print(out_y)
                # print(np.sum(out_y == label_y, dtype=np.float32))

                accuracy = np.sum(
                    out_y == label_y, dtype=np.float32) / (BATCH * 4)
                print("epoch:{},i:{},loss:{:.4f},acc:{:.2f}%"
                      .format(epoch, i, loss.item(), accuracy * 100))

                # print("label_y:", LabeltoStr(label_y[0]))
                # print("out_y:", LabeltoStr(out_y[0]))

        torch.save(net.state_dict(), save_path)

五、测试效果

       这是刚开始训练不久的效果,可以看到只预测对后面两个字符,随着数据量的加大和训练时间的变长,精度会逐步提高,最后基本能完全预测正确。
在这里插入图片描述

Seq2Seq模型是一个常用的序列到序列模型,可以用于机器翻译、对话生成等任务。下面是一个简单的Seq2Seq模型代码实现,供参考: 首先,导入必要的库: ```python import tensorflow as tf from tensorflow.keras.layers import Input, LSTM, Dense from tensorflow.keras.models import Model ``` 接着,定义模型的输入: ```python # Encoder输入 encoder_inputs = Input(shape=(None, encoder_vocab_size)) # Decoder输入 decoder_inputs = Input(shape=(None, decoder_vocab_size)) ``` 其中,encoder_inputs是Encoder的输入,decoder_inputs是Decoder的输入。encoder_vocab_size和decoder_vocab_size分别是Encoder和Decoder的词汇表大小。 下面,定义Encoder和Decoder: ```python # 定义Encoder encoder_lstm = LSTM(latent_dim, return_state=True) _, state_h, state_c = encoder_lstm(encoder_inputs) encoder_states = [state_h, state_c] # 定义Decoder decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) ``` 其中,latent_dim是隐藏层的维度。 最后,定义输出层并编译模型: ```python # 定义输出层 decoder_dense = Dense(decoder_vocab_size, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) # 编译模型 model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.compile(optimizer='rmsprop', loss='categorical_crossentropy') ``` 这里的输出层采用了softmax激活函数,并且损失函数采用了交叉熵。 以上就是Seq2Seq模型的一个简单实现,具体实现方式根据任务需求可能会有所不同。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值