要做文字图像识别,首先得分割字符。之前写过连通域的分割算法,这次就用该算法来分割字符。假设有如下图片,
首先基于连通域进行分割:
import numpy as npimport cv2from matplotlib import pyplot as pltclass ImgSplit(): def __init__(self,path): self.img0 = cv2.imread(path,cv2.IMREAD_GRAYSCALE)#读成灰度图 img = self.preProcess(self.img0,120) self.domains = self.connectedDomain(img) def preProcess(self,img0, thresh= 50): img = img0.copy() #二值化,同时黑白颠倒 img[np.where(img < thresh)] = 1 img[np.where(img >= thresh)] = 0 return img def connectedDomain(self, img): h,w = img.shape # 4-connected domain domain =dict() label = -1 # 用于标记连通域ID #i==0 # 第一行 flag = True #上一个像素是否为0? for j in range(w): if img[0,j]==1: if flag: #上一个元素是0则新建连通域 label += 1 domain[label] = set() domain[label].add((0,j)) flag = False else: flag = True for i in range(1,h):#第1行外的其它行 flag = True for j in range(w): if img[i,j] == 1: for key in domain: if (i-1,j) in domain[key]:#上一行对应元素是否属于某个连通域? domain[key].add((i,j))#记录像素坐标 if not flag:#左边的元素为1 if last_label != key: domain[key]=domain[key].union(domain[last_label]) #合并 del domain[last_label] last_label = key break else:#不属于上行已有的连通域 if flag: label += 1 domain[label] = set() domain[label].add((i,j)) last_label = label else: domain[last_label].add((i,j)) flag = False else: flag = True return domain.values()#imgSplit = ImgSplit("test.PNG")imgSplit = ImgSplit("3.PNG")domains = imgSplit.domains#按面积从大到小排序#descended = sorted(domains,key = len,reverse=True)#从上到下排列(按非零像素的平均x坐标)#up2down = sorted(domains,key = lambda g: sum([coord[1] for coord in g])/ float(len(g)))#从左到右排序 (按非零像素的平均y坐标)left2right = sorted(domains,key = lambda g: sum([coord[1] for coord in g])/ float(len(g)))’‘’for i,domain in enumerate(left2right): z = np.zeros((imgSplit.img0.shape)) for x,y in domain: z[x,y] =1 plt.subplot(10,12,i+1) plt.imshow(z)plt.show()‘’‘
结果分割后的碎片太多,有一百多块。
我们把左右靠的近的碎片合并(这里假设了字符左右排列):
yMeans = []yMins = []yMaxs =[]for domain in left2right: Y = [item[1] for item in domain] yMins.append(min(Y)) yMaxs.append(max(Y)) yMeans.append(sum(Y)/float(len(Y))) #合并上下型不该分开的连通域for i in range(len(yMeans)-1,0,-1): buffer = 0.21 #buffer rate if yMins[i-1]-buffer*(yMaxs[i-1]-yMins[i-1]) < yMeans[i]< yMaxs[i-1]+ buffer*(yMaxs[i-1]-yMins[i-1]) \ or yMins[i] -buffer*(yMaxs[i]-yMins[i]) < yMeans[i-1]< yMaxs[i]+buffer*(yMaxs[i]-yMins[i]): left2right[i-1] =left2right[i-1].union(left2right[i]) yMaxs[i-1]= max(yMaxs[i], yMaxs[i-1]) yMins[i-1] = min(yMins[i], yMins[i-1]) del left2right[i] del yMeans[i] del yMins[i] del yMaxs[i] for i,domain in enumerate(left2right): z = np.zeros((imgSplit.img0.shape)) for x,y in domain: z[x,y] =1 plt.subplot(5,5,i+1) plt.imshow(z)plt.show()
我们得到了16块带黑色背景的字符块儿:
最后把字符中心区域抠出来即可:
xMins = []xMaxs = []for domain in left2right: X = [item[0] for item in domain] xMins.append(min(X)) xMaxs.append(max(X))heightMax = max(xMaxs) - min(xMins)+1widthMax = int(42/26.0*heightMax) #42/26:训练集图像的长宽比for i,domain in enumerate(left2right): z = np.zeros((imgSplit.img0.shape)) for x,y in domain: z[x,y] =1 z = z[min(xMins):max(xMaxs)+1, yMins[i]:yMaxs[i]+1] plt.subplot(4,4,i+1) plt.imshow(z)plt.show()
分割得(空格被忽略了):