2021SC@SDUSC
源码配置的详情见第一篇分析
这一次分析以下代码的一部分:
val = segmentation.slidingWindowsEval(image_gray)
这行代码是pipline.py的217行的方法:
def slidingWindowsEval(image):
windows_size = 16;
stride = 1
height= image.shape[0]
t0 = time.time()
data_sets = []
for i in range(0,image.shape[1]-windows_size+1,stride):
data = image[0:height,i:i+windows_size]
data = cv2.resize(data,(23,23))
# cv2.imshow("image",data)
data = cv2.equalizeHist(data)
data = data.astype(np.float)/255
data= np.expand_dims(data,3)
data_sets.append(data)
res = model2.predict(np.array(data_sets))
print("分割",time.time() - t0)
pin = res
p = 1 - (res.T)[1]
p = f.gaussian_filter1d(np.array(p,dtype=np.float),3)
lmin = l.argrelmax(np.array(p),order = 3)[0]
interval = []
for i in range(len(lmin)-1):
interval.append(lmin[i+1]-lmin[i])
if(len(interval)>3):
mid = get_median(interval)
else:
return []
pin = np.array(pin)
res = searchOptimalCuttingPoint(image,pin,0,mid,3)
cutting_pts = res[1]
last = cutting_pts[-1] + mid
if last < image.shape[1]:
cutting_pts.append(last)
else:
cutting_pts.append(image.shape[1]-1)
name = ""
confidence =0.00
seg_block = []
for x in range(1,len(cutting_pts)):
if x != len(cutting_pts)-1 and x!=1:
section = image[0:36,cutting_pts[x-1]-2:cutting_pts[x]+2]
elif x==1:
c_head = cutting_pts[x - 1]- 2
if c_head<0:
c_head=0
c_tail = cutting_pts[x] + 2
section = image[0:36, c_head:c_tail]
elif x==len(cutting_pts)-1:
end = cutting_pts[x]
diff = image.shape[1]-end
c_head = cutting_pts[x - 1]
c_tail = cutting_pts[x]
if diff<7 :
section = image[0:36, c_head-5:c_tail+5]
else:
diff-=1
section = image[0:36, c_head - diff:c_tail + diff]
elif x==2:
section = image[0:36, cutting_pts[x - 1] - 3:cutting_pts[x-1]+ mid]
else:
section = image[0:36,cutting_pts[x-1]:cutting_pts[x]]
seg_block.append(section)
refined = refineCrop(seg_block,mid-1)
t0 = time.time()
for i,one in enumerate(refined):
res_pre = cRP.SimplePredict(one, i )
# cv2.imshow(str(i),one)
# cv2.waitKey(0)
confidence+=res_pre[0]
name+= res_pre[1]
print("字符识别",time.time() - t0)
return refined,name,confidence
slidingWindowsEval是基于滑动窗口的字符分割与识别。
里介绍一篇较为经典的论文(End-to-end text recognition with convolutional neural networks)在Cousera上Andrew Ng的Machine Learning课程中也提到了这种方法。在OpenCV的text模块中也有对应的实现。
它的主要思想是利用一个训练好的正负样本分类器来进行在图像上滑动然后产生概率response图,然后对raw response进行nms(非极大值抑制)。在确定字符bdbox,数目之后使用类似于viterbi算法来获取最佳分割路径。