问题描述: 将汉字图片二值化,使汉字和背景分离
import cv2
import matplotlib.pylab as plt
import numpy as np
%matplotlib inline
# # imread
# img = cv2.imdecode(np.fromfile(u'x:/中文路径/xx.jpg', dtype=np.uint8), flags) #flags=cv2.IMREAD_COLOR...
# #imwrite
# cv2.imencode('.jpg', img)[1].tofile(u'x:/中文路径/xx.jpg')
img_paths = [...]
imgs = []
for ipath in img_paths:
imgs.append(cv2.imdecode(np.fromfile(ipath, dtype=np.uint8),cv2.IMREAD_GRAYSCALE))
for o_img in imgs:
img = threshold(o_img)
用到的二值化函数:
def isBlackChar(img):
return np.sum(img>100) > np.sum(img<100)
def threshold(o_img):
length = np.min(o_img.shape)
length = length if length%2==1 else length-1
img = o_img
# 中值滤波
img = cv2.blur(img,(3,3))
img = cv2.medianBlur(img,7)
# 均值滤波
img = cv2.blur(img,(3,3))
img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,length,2)
img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,length,2)
if(not isBlackChar(img)):
img = 255-img
return img