字符分割算法及代码

最新推荐文章于 2023-08-01 15:34:27 发布

aift

最新推荐文章于 2023-08-01 15:34:27 发布

阅读量5k

点赞数 7

分类专栏：光学字符识别（OCR）

本文链接：https://blog.csdn.net/ft_sunshine/article/details/89914447

版权

光学字符识别（OCR）同时被 2 个专栏收录

8 篇文章 4 订阅

订阅专栏

数字图像处理

1 篇文章 0 订阅

订阅专栏

之前一段时间做了一个少数民族语言的文字检测与识别的项目，一共有6种语言，其中的彝语的识别相对简单，部分彝语文档图片如下。由于印刷体版式工整，加之彝语的结构较为简单，遂采取了单字符分割+识别（此算法同样可适用于汉字、韩语、日语等类似结构文字的单字符分割部分）的流程来进行彝语的识别工作。（其实主要还是因为训练样本不够啊，外加甲方希望能得到单字符的坐标，不然直接用CRNN来做序列识别可能会更简单一点，卒。。。）
在这里插入图片描述

上图所示为彝语打印文档的一张扫描件示例图。由图可见彝语的结构跟汉字在某些程度上有些相似之处，比如都属于方块字，存在上下结构和左右结构等。由图可发现彝语的识别可以采用单字符分割+识别的方式来进行处理。下面是我对这部分（单字符分割）的处理代码。（此代码经修改后同样可适用于汉字、韩语、日语等类似结构文字的单字符分割部分）

import cv2    # 版本为3.3.10，安装4.0及以上版本可能会不兼容
import os
import numpy as np


def display(img):
    cv2.imshow("1", img)
    cv2.waitKey(0)


def get_split_line(img, projection_row):
    split_line_list = []
    flag = False
    start = 0
    end = 0
    for i in range(0, len(projection_row)):
        if flag == False and projection_row[i] > 0:
            flag = True
            start = i
        elif flag and (projection_row[i] == 0 or i == len(projection_row) - 1):
            flag = False
            end = i
            if end - start < 15:  # need specify or rewrite
                flag = True
                continue
            else:
                split_line_list.append((start, end))
    return split_line_list


def get_contours(img):
    contour_list = []
    contours = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for i in range(0, len(contours[1])):
        x, y, w, h = cv2.boundingRect(contours[1][i])
        contour_list.append((x, y, w, h))
        # cv2.rectangle(img_input, (x,y), (x+w, y+h), (0,0,255))
    return contour_list


def sort_merge(contour_row):
    contour_row = sorted(contour_row, key=lambda x: x[0])  # sort by x
    # print(contour_row)
    i = 0
    for _ in contour_row:    # 这部分的合并规则用的是刘成林老师paper中的方法
        if i == len(contour_row) - 1 or contour_row[i][0] == -1:
            break
        # print(contour_row[i])
        rectR = contour_row[i + 1]
        rectL = contour_row[i]
        ovlp = rectL[0] + rectL[2] - rectR[0]
        dist = abs((rectR[0] + rectR[2] / 2) - (rectL[0] - rectL[2] / 2))
        w_L = rectL[0] + rectL[2]
        w_R = rectR[0] + rectR[2]
        span = (w_R if w_R > w_L else w_L) - rectL[0]
        nmovlp = (ovlp / rectL[2] + ovlp / rectR[2]) / 2 - dist / span / 8
        if nmovlp > 0:
            x = rectL[0]
            y = (rectL[1] if rectL[1] < rectR[1] else rectR[1])
            w_L = rectL[0] + rectL[2]
            w_R = rectR[0] + rectR[2]
            w = (w_R if w_R > w_L else w_L) - x
            h_L = rectL[1] + rectL[3]
            h_R = rectR[1] + rectR[3]
            h = (h_R if h_R > h_L else h_L) - y
            contour_row[i] = (x, y, w, h)
            contour_row.pop(i + 1)  # after pop , index at i
            contour_row.append((-1, -1, -1, -1))  # add to fix bug(the better way is use iterator)
            i -= 1
        i += 1
    # print(contour_row)
    return contour_row


def combine_verticalLine(contour_row):
    i = 0
    pop_num = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[0] == -1:
            break

        if rect[2] == 0:
            i += 1
            continue


        if rect[3] * 1.0 / rect[2] > 4:
            if i != 0 and i != len(contour_row) - 1:
                rect_left = contour_row[i - 1]
                rect_right = contour_row[i + 1]
                left_dis = rect[0] - rect_left[0] - rect_left[2]
                right_dis = rect_right[0] - rect[0] - rect[2]
                # if left_dis <= right_dis:
                if left_dis <= right_dis and rect_left[2] < rect_right[2]:
                    x = rect_left[0]
                    y = (rect_left[1] if rect_left[1] < rect[1] else rect[1])
                    w = rect[0] + rect[2] - rect_left[0]
                    h_1 = rect_left[1] + rect_left[3]
                    h_2 = rect[1] + rect[3]
                    h_ = (h_1 if h_1 > h_2 else h_2)
                    h = h_ - y
                    contour_row[i - 1] = (x, y, w, h)
                    contour_row.pop(i)
                    contour_row.append((-1, -1, -1, -1))
                    pop_num += 1
                    # don't need recursive merge, causing it's left and right merge
                else:
                    x = rect[0]
                    y = (rect[1] if rect[1] < rect_right[1] else rect_right[1])
                    w = rect_right[0] + rect_right[2] - rect[0]
                    h_1 = rect_right[1] + rect_right[3]
                    h_2 = rect[1] + rect[3]
                    h_ = (h_1 if h_1 > h_2 else h_2)
                    h = h_ - y
                    contour_row[i] = (x, y, w, h)
                    contour_row.pop(i + 1)
                    contour_row.append((-1, -1, -1, -1))
                    pop_num += 1
        i += 1
    for i in range(0, pop_num):
        contour_row.pop()
    return contour_row


def split_oversizeWidth(contour_row):
    i = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[2] * 1.0 / rect[3] > 1.2:  # height/width>1.2 -> split
            x_new = int(rect[0] + rect[2] / 2 + 1)
            y_new = rect[1]
            w_new = rect[0] + rect[2] - x_new
            h_new = rect[3]
            contour_row[i] = (rect[0], rect[1], int(rect[2] / 2), rect[3])
            contour_row.insert(i + 1, (x_new, y_new, w_new, h_new))
        i += 1
    return contour_row


def image_preprocess(img_input):
    gray_img = cv2.cvtColor(img_input, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.GaussianBlur(gray_img, (3, 3), 3)
    _, img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_OTSU)  # 将一幅灰度图二值化 input-one channel

    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    img = cv2.erode(img, kernel)
    # height,width=img.shape[:2]
    # img=cv2.resize(img,(int(width/2),int(height/2)),interpolation=cv2.INTER_CUBIC)
    # display(img)
    return img


def get_segmentation_result(img):  # has been eroded
    projection_row = cv2.reduce(img, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S)  # projection
    split_line_list = get_split_line(img, projection_row)  # split image as row
    segmentation_result = []
    for i in split_line_list:
        img_row = img[i[0]:i[1], :]
        contour_row = get_contours(img_row)
        contour_row = sort_merge(contour_row)
        contour_row = split_oversizeWidth(contour_row)
        contour_row = combine_verticalLine(contour_row)
        segmentation_result.append(contour_row)
        for (x, y, w, h) in contour_row:  # draw
            y += i[0]
            cv2.rectangle(img_input, (x, y), (x + w, y + h), (0, 0, 255))
    # cv2.imwrite("./test1/n003.jpg",img_input)
    return segmentation_result


pic_path = './img003.jpg'

img_input = cv2.imread(pic_path, 1)  # (2975, 1787, 3)   但是图片查看器显示的是  1787 * 2975
img = image_preprocess(img_input)  # erode
segmentation_result = get_segmentation_result(img)  # store segmentation result : [(x,y,w,h),(),...]

# cv2.imwrite("./save.jpg", img_input)
display(img_input)

代码运行结果如下：
在这里插入图片描述
此代码经过一定的修改可适用于很多的场景，大家有需要可以尝试一下。

下面大致说一下代码的流程（关注get_segmentation_result部分），字符分割的代码部分主要是采用“投影+连通域”的方式来实现。首先对图像进行二值化与腐蚀（断开印刷过程中可能存在的粘连）的操作，之后对预处理后的图片进行水平投影，得到图片中所有的文本行。然后分别对图片中的每一个文本行进行一系列操作，得到其连通域，然后再对连通域进行合并（合并规则是刘成林老师的一篇paper中的方法，代码中已经注释，具体操作步骤见下图），针对合并错的部分进行切分操作（针对过宽的部分），最后再对没有合并的部分高宽比很大的竖形字符进行单独的合并。进行完上述操作之后，就可以得到不错的分割效果。（具体流程和操作步骤仅供参考）
在这里插入图片描述
有问题欢迎多交流，共同进步~

aift

关注

7
点赞
踩
37

收藏

觉得还不错? 一键收藏
9
评论
字符分割算法及代码

之前一段时间做了一个少数民族语言的文字检测与识别的项目，一共有6种语言，其中的彝语的识别相对简单，部分彝语文档图片如下。由于印刷体版式工整，加之彝语的结构较为简单，遂采取了单字符分割+识别的流程来进行彝语的识别工作。（其实主要还是因为训练样本不够啊，外加“某局”希望能得到单字符的坐标，不然直接用CRNN来做序列识别可能会更简单一点，卒。。。）上图所示为彝语打印文档的一张扫描件示例图。由图可见彝...
复制链接

扫一扫