字符分割算法及代码

之前一段时间做了一个少数民族语言的文字检测与识别的项目,一共有6种语言,其中的彝语的识别相对简单,部分彝语文档图片如下。由于印刷体版式工整,加之彝语的结构较为简单,遂采取了单字符分割+识别(此算法同样可适用于汉字韩语日语等类似结构文字的单字符分割部分)的流程来进行彝语的识别工作。(其实主要还是因为训练样本不够啊,外加甲方希望能得到单字符的坐标,不然直接用CRNN来做序列识别可能会更简单一点,卒。。。)
在这里插入图片描述

上图所示为彝语打印文档的一张扫描件示例图。由图可见彝语的结构跟汉字在某些程度上有些相似之处,比如都属于方块字,存在上下结构和左右结构等。由图可发现彝语的识别可以采用单字符分割+识别的方式来进行处理。下面是我对这部分(单字符分割)的处理代码。(此代码经修改后同样可适用于汉字韩语日语等类似结构文字的单字符分割部分)

import cv2    # 版本为3.3.10,安装4.0及以上版本可能会不兼容
import os
import numpy as np


def display(img):
    cv2.imshow("1", img)
    cv2.waitKey(0)


def get_split_line(img, projection_row):
    split_line_list = []
    flag = False
    start = 0
    end = 0
    for i in range(0, len(projection_row)):
        if flag == False and projection_row[i] > 0:
            flag = True
            start = i
        elif flag and (projection_row[i] == 0 or i == len(projection_row) - 1):
            flag = False
            end = i
            if end - start < 15:  # need specify or rewrite
                flag = True
                continue
            else:
                split_line_list.append((start, end))
    return split_line_list


def get_contours(img):
    contour_list = []
    contours = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for i in range(0, len(contours[1])):
        x, y, w, h = cv2.boundingRect(contours[1][i])
        contour_list.append((x, y, w, h))
        # cv2.rectangle(img_input, (x,y), (x+w, y+h), (0,0,255))
    return contour_list


def sort_merge(contour_row):
    contour_row = sorted(contour_row, key=lambda x: x[0])  # sort by x
    # print(contour_row)
    i = 0
    for _ in contour_row:    # 这部分的合并规则用的是刘成林老师paper中的方法
        if i == len(contour_row) - 1 or contour_row[i][0] == -1:
            break
        # print(contour_row[i])
        rectR = contour_row[i + 1]
        rectL = contour_row[i]
        ovlp = rectL[0] + rectL[2] - rectR[0]
        dist = abs((rectR[0] + rectR[2] / 2) - (rectL[0] - rectL[2] / 2))
        w_L = rectL[0] + rectL[2]
        w_R = rectR[0] + rectR[2]
        span = (w_R if w_R > w_L else w_L) - rectL[0]
        nmovlp = (ovlp / rectL[2] + ovlp / rectR[2]) / 2 - dist / span / 8
        if nmovlp > 0:
            x = rectL[0]
            y = (rectL[1] if rectL[1] < rectR[1] else rectR[1])
            w_L = rectL[0] + rectL[2]
            w_R = rectR[0] + rectR[2]
            w = (w_R if w_R > w_L else w_L) - x
            h_L = rectL[1] + rectL[3]
            h_R = rectR[1] + rectR[3]
            h = (h_R if h_R > h_L else h_L) - y
            contour_row[i] = (x, y, w, h)
            contour_row.pop(i + 1)  # after pop , index at i
            contour_row.append((-1, -1, -1, -1))  # add to fix bug(the better way is use iterator)
            i -= 1
        i += 1
    # print(contour_row)
    return contour_row


def combine_verticalLine(contour_row):
    i = 0
    pop_num = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[0] == -1:
            break

        if rect[2] == 0:
            i += 1
            continue


        if rect[3] * 1.0 / rect[2] > 4:
            if i != 0 and i != len(contour_row) - 1:
                rect_left = contour_row[i - 1]
                rect_right = contour_row[i + 1]
                left_dis = rect[0] - rect_left[0] - rect_left[2]
                right_dis = rect_right[0] - rect[0] - rect[2]
                # if left_dis <= right_dis:
                if left_dis <= right_dis and rect_left[2] < rect_right[2]:
                    x = rect_left[0]
                    y = (rect_left[1] if rect_left[1] < rect[1] else rect[1])
                    w = rect[0] + rect[2] - rect_left[0]
                    h_1 = rect_left[1] + rect_left[3]
                    h_2 = rect[1] + rect[3]
                    h_ = (h_1 if h_1 > h_2 else h_2)
                    h = h_ - y
                    contour_row[i - 1] = (x, y, w, h)
                    contour_row.pop(i)
                    contour_row.append((-1, -1, -1, -1))
                    pop_num += 1
                    # don't need recursive merge, causing it's left and right merge
                else:
                    x = rect[0]
                    y = (rect[1] if rect[1] < rect_right[1] else rect_right[1])
                    w = rect_right[0] + rect_right[2] - rect[0]
                    h_1 = rect_right[1] + rect_right[3]
                    h_2 = rect[1] + rect[3]
                    h_ = (h_1 if h_1 > h_2 else h_2)
                    h = h_ - y
                    contour_row[i] = (x, y, w, h)
                    contour_row.pop(i + 1)
                    contour_row.append((-1, -1, -1, -1))
                    pop_num += 1
        i += 1
    for i in range(0, pop_num):
        contour_row.pop()
    return contour_row


def split_oversizeWidth(contour_row):
    i = 0
    for _ in contour_row:
        rect = contour_row[i]
        if rect[2] * 1.0 / rect[3] > 1.2:  # height/width>1.2 -> split
            x_new = int(rect[0] + rect[2] / 2 + 1)
            y_new = rect[1]
            w_new = rect[0] + rect[2] - x_new
            h_new = rect[3]
            contour_row[i] = (rect[0], rect[1], int(rect[2] / 2), rect[3])
            contour_row.insert(i + 1, (x_new, y_new, w_new, h_new))
        i += 1
    return contour_row


def image_preprocess(img_input):
    gray_img = cv2.cvtColor(img_input, cv2.COLOR_BGR2GRAY)
    gray_img = cv2.GaussianBlur(gray_img, (3, 3), 3)
    _, img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_OTSU)  # 将一幅灰度图二值化 input-one channel

    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    img = cv2.erode(img, kernel)
    # height,width=img.shape[:2]
    # img=cv2.resize(img,(int(width/2),int(height/2)),interpolation=cv2.INTER_CUBIC)
    # display(img)
    return img


def get_segmentation_result(img):  # has been eroded
    projection_row = cv2.reduce(img, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S)  # projection
    split_line_list = get_split_line(img, projection_row)  # split image as row
    segmentation_result = []
    for i in split_line_list:
        img_row = img[i[0]:i[1], :]
        contour_row = get_contours(img_row)
        contour_row = sort_merge(contour_row)
        contour_row = split_oversizeWidth(contour_row)
        contour_row = combine_verticalLine(contour_row)
        segmentation_result.append(contour_row)
        for (x, y, w, h) in contour_row:  # draw
            y += i[0]
            cv2.rectangle(img_input, (x, y), (x + w, y + h), (0, 0, 255))
    # cv2.imwrite("./test1/n003.jpg",img_input)
    return segmentation_result


pic_path = './img003.jpg'

img_input = cv2.imread(pic_path, 1)  # (2975, 1787, 3)   但是图片查看器显示的是  1787 * 2975
img = image_preprocess(img_input)  # erode
segmentation_result = get_segmentation_result(img)  # store segmentation result : [(x,y,w,h),(),...]

# cv2.imwrite("./save.jpg", img_input)
display(img_input)

代码运行结果如下:
在这里插入图片描述
此代码经过一定的修改可适用于很多的场景,大家有需要可以尝试一下。

下面大致说一下代码的流程(关注get_segmentation_result部分),字符分割的代码部分主要是采用“投影+连通域”的方式来实现。首先对图像进行二值化与腐蚀(断开印刷过程中可能存在的粘连)的操作,之后对预处理后的图片进行水平投影,得到图片中所有的文本行。然后分别对图片中的每一个文本行进行一系列操作,得到其连通域,然后再对连通域进行合并(合并规则是刘成林老师的一篇paper中的方法,代码中已经注释,具体操作步骤见下图),针对合并错的部分进行切分操作(针对过宽的部分),最后再对没有合并的部分高宽比很大的竖形字符进行单独的合并。进行完上述操作之后,就可以得到不错的分割效果。(具体流程和操作步骤仅供参考)
在这里插入图片描述
有问题欢迎多交流,共同进步~

  • 7
    点赞
  • 37
    收藏
    觉得还不错? 一键收藏
  • 9
    评论
Bernaola Galvan分割算法是用于分析DNA序列的一种算法,以下是一个简单的Python实现: ```python def bernoulli(seq): """ 计算一个序列的伯努利数列 """ return [1 if s == '1' else -1 for s in seq] def bg_segment(seq): """ Bernaola-Galvan分割算法实现 """ bseq = bernoulli(seq) N = len(bseq) q = [0] * N q[0] = bseq[0] for i in range(1, N): q[i] = q[i-1] + bseq[i] seg = [] for i in range(1, N): if q[i] * q[i-1] < 0: seg.append((i-1, i)) return seg ``` 这里的`seq`参数是DNA序列字符串,返回值是一个列表,其中每个元素是一个元组表示一个分割区间的起始和结束位置。使用方法示例如下: ```python seq = "ATGCTGATGGGGGCTAGCCGTACACAGTACTAGCCTGATCGTACGCTAGCGTACGATCGACGACGACGAGTCTAGCTGACGACTAGCTAGCTAGCTAGCTAGCTACGATGCTAGCTAGCTAGCTAGCTAGCTAGCTAGCTACG" segments = bg_segment(seq) for start, end in segments: print("Segment from", start, "to", end) ``` 输出结果示例如下: ``` Segment from 4 to 5 Segment from 9 to 10 Segment from 13 to 14 Segment from 14 to 15 Segment from 16 to 17 Segment from 20 to 21 Segment from 34 to 35 Segment from 46 to 47 Segment from 49 to 50 Segment from 52 to 53 Segment from 54 to 55 Segment from 62 to 63 Segment from 66 to 67 Segment from 73 to 74 Segment from 74 to 75 Segment from 76 to 77 Segment from 80 to 81 Segment from 92 to 93 Segment from 98 to 99 Segment from 104 to 105 Segment from 106 to 107 Segment from 110 to 111 Segment from 111 to 112 Segment from 113 to 114 Segment from 114 to 115 Segment from 115 to 116 Segment from 116 to 117 Segment from 118 to 119 Segment from 119 to 120 Segment from 124 to 125 Segment from 127 to 128 Segment from 130 to 131 Segment from 137 to 138 Segment from 139 to 140 Segment from 143 to 144 Segment from 145 to 146 Segment from 146 to 147 Segment from 147 to 148 Segment from 148 to 149 ``` 每个元素表示一个分割区间的起始和结束位置。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值