2021-10-11 OCR数据集整理

最新推荐文章于 2024-09-09 08:12:39 发布

齐名南

最新推荐文章于 2024-09-09 08:12:39 发布

阅读量835

点赞数

分类专栏： python OCR 文章标签： python

本文链接：https://blog.csdn.net/qq_51609636/article/details/120708468

版权

python 同时被 2 个专栏收录

15 篇文章 0 订阅

订阅专栏

OCR

6 篇文章 0 订阅

订阅专栏

1：标注数据集

2：截取数据集新json

3：生成label（利用labelme自带的labelme_json_to_dataset）生成jsondir

4：

分离数据集image，label

import os
import cv2
import numpy as np
#将json文件label转换为到data文件夹
# n=2#n为总共标注的图片数
# for i in range(n):
#     os.system('activate labelme & labelme_json_to_dataset F:/项目/线路标杆坠陀/data/txt/json/%d.json -o F:/项目/线路标杆坠陀/data/txt/data/%d_json'%(i,i))
#dst_w=512
#dst_h=512
#dst_shape=(dst_w,dst_h,3)
train_image = './train_image/'
if not os.path.exists(train_image):
    os.makedirs(train_image)
train_label = './train_label/'
if not os.path.exists(train_label):
    os.makedirs(train_label)

dirlist =  os.listdir('newjson/json_dir')
for dir in dirlist:
    print(dir)
    img=cv2.imread('./newjson/json_dir/%s/img.png'%dir)
    label=cv2.imread('./newjson/json_dir/%s/label.png'%dir)
    print(img.shape)
    label=label/np.max(label[:,:,2])*255
    label[:,:,0]=label[:,:,1]=label[:,:,2]
    print(np.max(label[:,:,2]))
    # cv2.imshow('l',label)
    # cv2.waitKey(0)
    print(set(label.ravel()))
    # image_name = dir.split('_')[0]+'_'+ dir.split('_')[1]
    image_name =dir[0:-5]
    print(image_name)
    cv2.imwrite(train_image+'%s.png'%image_name,img)
    cv2.imwrite(train_label+'%s.png'%image_name,label)

生成两个文件夹，一个放原图，一个放label的mask二值化后的

5：摆正数据集防止倾斜，mask倾斜情况，保存摆正后的图片

# -*- coding:utf-8 -*-
# author: DuanshengLiu
import cv2
import numpy as np
from numpy.lib.npyio import savez_compressed


def locate_and_correct(img_src, img_mask):
    """
    该函数通过cv2对img_mask进行边缘检测，获取车牌区域的边缘坐标(存储在contours中)和最小外接矩形4个端点坐标,
    再从车牌的边缘坐标中计算出和最小外接矩形4个端点最近的点即为平行四边形车牌的四个端点,从而实现车牌的定位和矫正
    :param img_src: 原始图片
    :param img_mask: 通过u_net进行图像分隔得到的二值化图片，车牌区域呈现白色，背景区域为黑色
    :return: 定位且矫正后的车牌
    """
    # cv2.imshow('img_mask',img_mask)
    # cv2.waitKey(0)
    # ret,thresh = cv2.threshold(img_mask[:,:,0],0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) #二值化
    # cv2.imshow('thresh',thresh)
    # cv2.waitKey(0)
    mask=img_mask[:, :, 0].astype(np.uint8)
    try:
        contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    except:  # 防止opencv版本不一致报错
        ret, contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not len(contours):  # contours1长度为0说明未检测到车牌
        # print("未检测到车牌")
        return [], []
    else:
        Lic_img = []
        img_src_copy = img_src.copy()  # img_src_copy用于绘制出定位的车牌轮廓
        for ii, cont in enumerate(contours):
            x, y, w, h = cv2.boundingRect(cont)  # 获取最小外接矩形
            img_cut_mask = img_mask[y:y + h, x:x + w]  # 将标签车牌区域截取出来
            # cv2.imshow('img_cut_mask',img_cut_mask)
            # cv2.waitKey(0)
            # print('w,h,均值,宽高比',w,h,np.mean(img_cut_mask),w/h)
            # contours中除了车牌区域可能会有宽或高都是1或者2这样的小噪点，
            # 而待选车牌区域的均值应较高，且宽和高不会非常小，因此通过以下条件进行筛选
            if np.mean(img_cut_mask) >= 75 and w > 15 and h > 15:
                rect = cv2.minAreaRect(cont)  # 针对坐标点获取带方向角的最小外接矩形，中心点坐标，宽高，旋转角度
                box = cv2.boxPoints(rect).astype(np.int32)  # 获取最小外接矩形四个顶点坐标
                # cv2.drawContours(img_mask, contours, -1, (0, 0, 255), 2)
                # cv2.drawContours(img_mask, [box], 0, (0, 255, 0), 2)
                # cv2.imshow('img_mask',img_mask)
                # cv2.waitKey(0)
                cont = cont.reshape(-1, 2).tolist()
                # 由于转换矩阵的两组坐标位置需要一一对应，因此需要将最小外接矩形的坐标进行排序，最终排序为[左上，左下，右上，右下]
                box = sorted(box, key=lambda xy: xy[0])  # 先按照左右进行排序，分为左侧的坐标和右侧的坐标
                box_left, box_right = box[:2], box[2:]  # 此时box的前2个是左侧的坐标，后2个是右侧的坐标
                box_left = sorted(box_left, key=lambda x: x[1])  # 再按照上下即y进行排序，此时box_left中为左上和左下两个端点坐标
                box_right = sorted(box_right, key=lambda x: x[1])  # 此时box_right中为右上和右下两个端点坐标
                box = np.array(box_left + box_right)  # [左上，左下，右上，右下]
                # print(box)
                x0, y0 = box[0][0], box[0][1]  # 这里的4个坐标即为最小外接矩形的四个坐标，接下来需获取平行(或不规则)四边形的坐标
                x1, y1 = box[1][0], box[1][1]
                x2, y2 = box[2][0], box[2][1]
                x3, y3 = box[3][0], box[3][1]

                def point_to_line_distance(X, Y):
                    if x2 - x0:
                        k_up = (y2 - y0) / (x2 - x0)  # 斜率不为无穷大
                        d_up = abs(k_up * X - Y + y2 - k_up * x2) / (k_up ** 2 + 1) ** 0.5
                    else:  # 斜率无穷大
                        d_up = abs(X - x2)
                    if x1 - x3:
                        k_down = (y1 - y3) / (x1 - x3)  # 斜率不为无穷大
                        d_down = abs(k_down * X - Y + y1 - k_down * x1) / (k_down ** 2 + 1) ** 0.5
                    else:  # 斜率无穷大
                        d_down = abs(X - x1)
                    return d_up, d_down

                d0, d1, d2, d3 = np.inf, np.inf, np.inf, np.inf
                l0, l1, l2, l3 = (x0, y0), (x1, y1), (x2, y2), (x3, y3)
                for each in cont:  # 计算cont中的坐标与矩形四个坐标的距离以及到上下两条直线的距离，对距离和进行权重的添加，成功计算选出四边形的4个顶点坐标
                    x, y = each[0], each[1]
                    dis0 = (x - x0) ** 2 + (y - y0) ** 2
                    dis1 = (x - x1) ** 2 + (y - y1) ** 2
                    dis2 = (x - x2) ** 2 + (y - y2) ** 2
                    dis3 = (x - x3) ** 2 + (y - y3) ** 2
                    d_up, d_down = point_to_line_distance(x, y)
                    weight = 0.975
                    if weight * d_up + (1 - weight) * dis0 < d0:  # 小于则更新
                        d0 = weight * d_up + (1 - weight) * dis0
                        l0 = (x, y)
                    if weight * d_down + (1 - weight) * dis1 < d1:
                        d1 = weight * d_down + (1 - weight) * dis1
                        l1 = (x, y)
                    if weight * d_up + (1 - weight) * dis2 < d2:
                        d2 = weight * d_up + (1 - weight) * dis2
                        l2 = (x, y)
                    if weight * d_down + (1 - weight) * dis3 < d3:
                        d3 = weight * d_down + (1 - weight) * dis3
                        l3 = (x, y)

                print([l0,l1,l2,l3])
                for l in [l0, l1, l2, l3]:
                    cv2.circle(img=img_mask, color=(0, 255, 255), center=tuple(l), thickness=2, radius=2)
                # cv2.imshow('img_mask',img_mask)
                # cv2.waitKey(0)
                p0 = np.float32([l0, l1, l2, l3])  # 左上角，左下角，右上角，右下角，p0和p1中的坐标顺序对应，以进行转换矩阵的形成
                p1 = np.float32([(0, 0), (0, 160), (32, 0), (32, 160)])  # 我们所需的长方形
                transform_mat = cv2.getPerspectiveTransform(p0, p1)  # 构成转换矩阵
                lic = cv2.warpPerspective(img_src, transform_mat, (32, 160))  # 进行车牌矫正
                # cv2.imshow('lic',lic)
                # cv2.waitKey(0)
                Lic_img.append(lic)
                cv2.drawContours(img_src_copy, [np.array([l0, l1, l3, l2])], -1, (0, 255, 0), 2)  # 在img_src_copy上绘制出定位的车牌轮廓，(0, 255, 0)表示绘制线条为绿色
    return img_src_copy, Lic_img

# for i in range(1):
#     print(i
# )
import os
save_path = 'jiuzhenghoutup'
dirlist =  os.listdir('./train_image')
for dir in dirlist:
    img=cv2.imread('./train_image/%s'%dir)
    label=cv2.imread('./train_label/%s'%dir,cv2.COLOR_BGR2GRAY)
    # print(label.shape)
    srcimg,licimg=locate_and_correct(img,label)
    print(len(licimg))
    print(licimg[0].shape)
    # cv2.imshow('xiugai',srcimg)
    # cv2.imshow('xiugai1',licimg[0])
    cv2.imencode('.jpg',licimg[0])[1].tofile(save_path + '/'+dir)
    # cv2.waitKey(0)
# cv2.WIN

6：这里是ocr，需要对摆正后的图片进行文字标注利用，这个代码网上有，如有需要可联系我。

import sys, os
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QGraphicsPixmapItem, QGraphicsScene, QApplication,QMessageBox
from PyQt5.QtGui import QPixmap
import LabelOCR, LogBrowser

from glob import glob
import time

img_formats = ['.bmp', '.jpg', '.jpeg', '.png']  # 支持的图片格式
label_formats = [".txt"]  # 支持的标注文件格式

# 标识符定义
LAST_PAGE = 0
NEXT_PAGE = 2
TO_PAGE = 1

# logger标识符
SAVE_LABEL = 0
JUMP_PAGE = 1
SHOW_PAGE = 2
LOAD_IMG = 3
LOAD_LABEL = 4


class MainCode(QMainWindow, LabelOCR.Ui_MainWindow):
    def __init__(self):
        QMainWindow.__init__(self)
        LabelOCR.Ui_MainWindow.__init__(self)
        self.setupUi(self)

        self.btn_load_img.clicked.connect(self.load_img)
        self.btn_load_text.clicked.connect(self.load_text)
        self.btn_last_page.clicked.connect(lambda: self.go_to_page(LAST_PAGE))  # 传参0,1,2  表示 上一页，指定页，下一页
        self.btn_next_page.clicked.connect(lambda: self.go_to_page(NEXT_PAGE))
        self.btn_go.clicked.connect(lambda: self.go_to_page(TO_PAGE))

        self.horizontalSlider.valueChanged.connect(self.show_img_rescled)
        self.btn_save.clicked.connect(self.save_file)
        self.btn_show_log.clicked.connect(self.show_history_log)

        self.log_window = LogWindow()

        self.init()

    def init(self):
        self.img_dir = None
        self.img_list = None
        self.image_files = []  # 图片文件名list
        self.text_dir = None
        self.text_list = None

        self.img_now = None
        self.text_now = None
        self.text_now_path = None
        self.page_now = 0
        self.img_num_total = 0

        self.pixmap = None

        self.page_init()
        self.screen_size_init()

    def page_init(self):
        self.resize_scale = 10  # 10 就是1倍， 50就是5倍
        self.horizontalSlider.setSliderPosition(self.resize_scale)

    def screen_size_init(self):
        self.resize(800, 600)
        pass

    def load_img(self):
        img_dir = QFileDialog.getExistingDirectory(self, "选取文件夹")  # 起始路径
        if len(img_dir) == 0 or img_dir == self.img_dir:
            return
        else:
            self.img_dir = img_dir

        f = sorted(glob(img_dir + os.sep + "*.*"))
        image_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]
        if len(image_files) == 0:
            reply = QMessageBox.question(self, 'Ooooops', '目录里没有支持的图片。', QMessageBox.Yes)
            return
        else:
            self.image_files= image_files
        self.img_num_total = len(self.image_files)

        self.logger(self.img_dir, LOAD_IMG)
        self.spinBox_page.setValue(0)       # 打开新目录时，页码回到1
        self.go_to_page(TO_PAGE)
        self.label_img_dir.setText(self.img_dir)
        self.label_num_total.setText("当前: {}/{}".format(self.page_now + 1, self.img_num_total))
        self.spinBox_page.setMaximum(self.img_num_total)

    def show_img(self, path):
        try:
            self.pixmap = QPixmap(path)
            self.item = QGraphicsPixmapItem(self.pixmap)
            self.scene = QGraphicsScene()  # 创建场景
            self.scene.addItem(self.item)
            self.graphicsView.setScene(self.scene)
            self.item.setScale(self.horizontalSlider.value() / 10)
            self.img_now = path
            return True

        except:
            QMessageBox.question(self, 'Ooooops', '目录里没有支持的图片。', QMessageBox.Yes)
            return False

    def show_img_rescled(self):
        if self.img_now is None:
            return
        self.item.setScale(self.horizontalSlider.value() / 10)
        self.scene = QGraphicsScene()  # 创建场景
        self.scene.addItem(self.item)
        self.graphicsView.setScene(self.scene)

    def load_text(self):
        text_dir = QFileDialog.getExistingDirectory(self, "选取文件夹")  # 起始路径
        if len(text_dir) == 0 or text_dir == self.text_dir:
            return
        else:
            self.text_dir = text_dir

        f = sorted(glob(text_dir + os.sep + "*.*"))
        self.text_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in label_formats]
        self.label_text_dir.setText(self.text_dir)
        self.logger(self.text_dir, LOAD_LABEL)
        self.go_to_page(TO_PAGE)

    def show_text(self, path):
        if os.path.exists(path):
            f = open(path, "r", encoding="utf-8")
            self.text_now = f.readline()
            self.label_editor.setText(self.text_now)
            f.close()
        else:
            self.statusbar.showMessage("找不到相应label")
            self.label_editor.setText("")
            self.text_now = None

    def go_to_page(self, state):
        if self.img_dir is None:
            return

        if state == LAST_PAGE:
            if self.page_now > 0:
                self.page_now -= 1
            else:
                return
        if state == NEXT_PAGE:
            if self.page_now + 1 < self.img_num_total:
                self.page_now += 1
            else:
                return

        if state == TO_PAGE:
            if 0 <= self.spinBox_page.value() - 1 < self.img_num_total:
                self.page_now = self.spinBox_page.value() - 1
            else:
                QMessageBox.question(self, 'Ooooops', '跳转页码超范围啦。', QMessageBox.Yes)
                return

        if state == LAST_PAGE or state == NEXT_PAGE:
            self.save_file()  # 翻页自动保存

        img_path = self.image_files[self.page_now]

        if self.text_dir is not None:
            basename = os.path.basename(img_path).split(".")[:-1]
            basename = ".".join(basename) + label_formats[0]
            self.text_now_path = os.path.join(self.text_dir, basename)
            self.show_text(self.text_now_path)

        show_img_flag = self.show_img(img_path)
        if show_img_flag:  # 显示成功的话
            self.label_file_now.setText(img_path)
            self.label_num_total.setText("当前: {}/{}".format(self.page_now + 1, self.img_num_total))
            self.spinBox_page.setValue(self.page_now + 1)
            self.logger(self.text_now_path, JUMP_PAGE, "当前: {}/{}".format(self.page_now + 1, self.img_num_total))

    def save_file(self):
        if self.text_now is None and self.label_editor.text().rstrip().lstrip() is "" :
            return
        elif self.text_now is None and self.label_editor.text().rstrip().lstrip() is not "" :
            basename = os.path.basename(self.img_now).split(".")[:-1]
            basename = ".".join(basename) + label_formats[0]
            self.text_now_path = os.path.join(self.text_dir, basename)
            f = open(self.text_now_path, "w", encoding="utf-8")
            f.writelines(self.label_editor.text().rstrip().lstrip())
            f.close()
            self.logger(self.text_now_path, SAVE_LABEL)

        text = self.label_editor.text().rstrip().lstrip()  # 左右空格都去掉
        if text == self.text_now:
            self.statusbar.showMessage("没有改变标注哦")
        else:
            self.text_now = text
            f = open(self.text_now_path, "w", encoding="utf-8")
            f.writelines(text)
            f.close()
            self.logger(self.text_now_path, SAVE_LABEL)

    def logger(self, file_name, behavior, memo=""):
        timestamp = time.strftime("%a %b %d %H:%M:%S %Y", time.localtime())

        if behavior == SAVE_LABEL:
            contect = "保存文件 " + file_name
        elif behavior == JUMP_PAGE:
            contect = "浏览 " + memo
        elif behavior == LOAD_IMG:
            contect = "载入图片目录 " + file_name
        elif behavior == LOAD_LABEL:
            contect = "载入标签目录 " + file_name
        else:
            contect = ""

        log = " ".join([timestamp, contect])
        self.log_window.textBrowser.append(log)
        self.statusbar.showMessage(log)

    def show_history_log(self):
        self.log_window.show()

    def keyPressEvent(self, event):
        if str(event.key()) == "65":
            self.go_to_page(LAST_PAGE)
        elif str(event.key()) == "68":
            self.go_to_page(NEXT_PAGE)
        elif str(event.key()) == "83":
            self.save_file()
        elif str(event.key()) == "72":
            self.show_history_log()

    def closeEvent(self, event):
        self.log_window.close()


class LogWindow(QMainWindow, LogBrowser.Ui_MainWindow):
    # _signal = QtCore.pyqtSignal(str)
    def __init__(self):
        QMainWindow.__init__(self)
        LabelOCR.Ui_MainWindow.__init__(self)
        self.setupUi(self)
        self.logger = None


if __name__ == '__main__':
    app = QApplication(sys.argv)

    main_window = MainCode()
    log_window = LogWindow()

    with open('qss/blue.txt') as file:
        style = file.readlines()
        style = ''.join(style).strip('\n')
        app.setStyleSheet(style)

    main_window.show()
    sys.exit(app.exec_())

7：标注完后需要和字典建立联系，字典是：char_std_5990.txt，并且将竖型文字横着来

建立联系需要用到一下工具：

# -*- coding: utf-8 -*-
import os
import numpy as np
import cv2
def gen_labetxt(path,dist_txt,org_labelpath,out_file):
    path_list = os.listdir(path)
    for image in path_list:
        file_name=image.split('.')[0]
        print(file_name)
        labe_path = org_labelpath+'/'+str(file_name)+'.txt'
        f=open(labe_path,encoding = 'UTF-8')
        content=f.read()
        f = open(dist_txt, encoding='utf-8')
        filecontent = f.readlines()#.rstrip('\n')
        dic_list=[]
        for i in content:
            index =0
            for data in filecontent:
                if i in data:
                    dic_list.append(index)
                    break
                index = index +1
        save_path= (out_file)
        f = open(save_path, 'a+', encoding='utf-8') ##
        f.write(str(image)+' '+str(dic_list).replace(', ',' ').replace('[','').replace(']','')+'\n')
        f.close()   

def rot90_image(path,saveimagepath):
    path_list = os.listdir(path)
    for image in path_list:
        imagepath = path + '/' + image
        imagedata = cv2.imread(imagepath)
        img=np.rot90(imagedata)
        cv2.imwrite(saveimagepath+'/'+image,img)
        print(image)




save_path= ('3.txt')
f = open(save_path, 'w', encoding='utf-8') ##
path = 'txtimage'
labelpath ='txtlabe'
dist_txt = 'char_std_5990.txt'
gen_labetxt(path,dist_txt,labelpath,save_path)
saveimagepath = 'newimage'
rot90_image(path,saveimagepath)