中文车牌数据集ccpd，标签处理

cv-daily

已于 2022-04-26 15:02:49 修改

阅读量2.3k

点赞数 1

文章标签： python 开发语言后端

于 2021-11-10 19:22:01 首次发布

本文链接：https://blog.csdn.net/weixin_41012399/article/details/121254797

版权

数据集介绍
CCPD数据集主要分为CCPD2019数据集和CCPD2020(CCPD-Green)数据集。CCPD2019数据集车牌类型仅有普通车牌(蓝色车牌)，CCPD2020数据集车牌类型仅有新能源车牌(绿色车牌)。
在CCPD数据集中，每张图片仅包含一张车牌，车牌的车牌省份主要为皖。CCPD中的每幅图像都包含大量的标注信息，但是CCPD数据集没有专门的标注文件，每张图像的文件名就是该图像对应的数据标注。标注最困难的部分是注释四个顶点的位置。为了完成这项任务，数据发布者首先在10k图像上手动标记四个顶点的位置。然后设计了一个基于深度学习的检测模型，在对该网络进行良好训练后，对每幅图像的四个顶点位置进行自动标注。最后，数据发布者雇用了7名兼职工人在两周内纠正这些标注。CCPD提供了超过250k个独特的车牌图像和详细的注释。每张图像的分辨率为720(宽度)× 1160(高)× 3(通道)。实际上，这种分辨率足以保证每张图像中的车牌清晰可辨,但是该数据有些图片标注可能不准。不过总的来说CCPD数据集非常推荐研究车牌识别算法的人员学习使用。
参考：https://blog.csdn.net/LuohenYJ/article/details/117752120
https://blog.csdn.net/qq_36516958/article/details/114274778
标签处理：
gettex.py

import os
p="/home/zl/tao/datasets/CCPD2020/ccpd_green/"
dirs = os.listdir(p)
filename="/home/zl/tao/datasets/CCPD2020/mytxtfile.txt"
file = open(filename,'w')
for dir in dirs:
    data_dir=p+dir
    img_dir=os.listdir(data_dir)
    for i in range(len(img_dir)):
        s = str(img_dir[i]).replace('[','').replace(']','')
        s = s.replace("'",'').replace(',','') +'\n'   
        file.write(s)
file.close()
print("保存文件成功")

cropped.py

import cv2
import os
def new_label(old_label):
    provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙",
                 "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂",
                 "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学",
                 "O"]

    ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P',
           'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
           'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']

    # code 2
    char_dict = {"京": 0, "沪": 1, "津": 2, "渝": 3, "冀": 4, "晋": 5, "蒙": 6, "辽": 7, "吉": 8, "黑": 9, "苏": 10,
                 "浙": 11, "皖": 12, "闽": 13, "赣": 14, "鲁": 15, "豫": 16, "鄂": 17, "湘": 18, "粤": 19, "桂": 20,
                 "琼": 21, "川": 22, "贵": 23, "云": 24, "藏": 25, "陕": 26, "甘": 27, "青": 28, "宁": 29, "新": 30,
                 "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36, "6": 37, "7": 38, "8": 39, "9": 40,
                 "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48, "J": 49, "K": 50,
                 "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,
                 "W": 61, "X": 62, "Y": 63, "Z": 64}


    car_code2 = ""
    for i, number in enumerate(old_label.split("_")):
        if i == 0:
            car_origin_number = provinces[int(number)]
        else:
            car_origin_number = ads[int(number)]
        # car_code2.append(char_dict[car_origin_number])
        car_code2+=str(car_origin_number)
    return car_code2


f=open('/home/zl/tao/datasets/CCPD2020/mytxtfile.txt', encoding="utf-8")
txt=[]
for line in f:
    txt.append(line.strip())

path="/home/zl/tao/datasets/CCPD2020/ccpd_green/all/"
path_new=[]
car_code2=[]

labels_path="/home/zl/tao/datasets/CCPD2020/ccpd_green/labels/"
for i in range(len(txt)):
    path_new=os.path.join(path,txt[i])
    img=cv2.imread(path_new)
    img_name = path_new
    iname = img_name.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')
    old_label=iname[-3]
    old_label=new_label(old_label)
    [leftUp, rightDown] = [[int(eel) for eel in el.split('&')] for el in iname[2].split('_')]
    cropped=img[leftUp[1]:rightDown[1],leftUp[0]:rightDown[0]]

    pic = cv2.resize(cropped, (240, 80), interpolation=cv2.INTER_CUBIC)
    imagename = "/home/zl/tao/datasets/CCPD2020/ccpd_green/images/" + str(iname[0]) + ".jpg"
    # cv2.imwrite(imagename, cropped)
    cv2.imencode('.jpg', pic)[1].tofile(imagename)

    label_txt=os.path.join(labels_path,iname[0]) + '.txt'
    file = open(label_txt, 'w')
    file.write(old_label)
    file.close()
    print(i)

最后得到labels文件夹和图片文件夹。

处理成车牌检测，kitti的格式，

import cv2
import os
    car_code2 = ""
    for i, number in enumerate(old_label.split("_")):
        if i == 0:
            car_origin_number = provinces[int(number)]
        else:
            car_origin_number = ads[int(number)]
        # car_code2.append(char_dict[car_origin_number])
        car_code2+=str(car_origin_number)
    return car_code2

f=open('/home/zl/tao/datasets/CCPD2020/mytxtfile.txt', encoding="utf-8")
txt=[]
for line in f:
    txt.append(line.strip())

path="/home/zl/tao/datasets/CCPD2020/ccpd_green/all/"
path_new=[]
car_code2=[]

labels_path="/home/zl/tao/datasets/CCPD2020/ccpd_green/labels_lpd/"

outpath = "/home/zl/tao/datasets/CCPD2020/ccpd_green/show_lpd/"
for i in range(len(txt)):
    path_new=os.path.join(path,txt[i])
    img=cv2.imread(path_new)
    img_name = path_new
    iname = img_name.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')
    roi =iname[2].split('_')
    imagename = "/home/zl/tao/datasets/CCPD2020/ccpd_green/images_lpd/" + str(iname[0]+str(i)) + ".jpg"
    cv2.imencode('.jpg', img)[1].tofile(imagename)

    x1 = float(roi[0].split('&')[0])
    y1 = float(roi[0].split('&')[1])
    x2 = float(roi[1].split('&')[0])
    y2 = float(roi[1].split('&')[1])

    with open(os.path.join(labels_path,
                           iname[0]+str(i) + ".txt"), "w") as f:
        f.write("car 0.0 0 0.0 {:.2f} {:.2f} {:.2f} {:.2f} 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n".format(x1, y1, x2, y2))
        
    img = cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), thickness=2)
    img = cv2.putText(img, 'car', (int(x1), int(y1) - 2), 0, 1, [225, 255, 255], thickness=1, lineType=cv2.LINE_AA)
    img_dst = outpath + str(iname[0]+str(i)) + ".jpg"
    cv2.imwrite(img_dst, img)

    print(i)

cv-daily

关注

1
点赞
踩
15

收藏

觉得还不错? 一键收藏
0
评论
中文车牌数据集ccpd，标签处理

参考：https://blog.csdn.net/qq_36516958/article/details/114274778标签处理：gettex.pyimport osp="/home/zhanglu/tao/datasets/CCPD2020/ccpd_green/"dirs = os.listdir(p)filename="/home/zhanglu/tao/datasets/CCPD2020/mytxtfile.txt"file = open(filename,'w')for dir
复制链接

扫一扫