手写数字识别练习
最近比较闲,想学习下文字识别。希望使用卷积神经网络来实现对手写数字照片文字定位和识别。
数据来源
训练集:keras.datasets.mnist 手写字符数据集。
测试集:自己手写π的前100多位。
图片需要进行字符分割和文本行定位,在通过训练好的cnn进行识别。
数据预处理
照片虽然是灰色的但不是灰度图片,需要读取图片后灰度化。
img = cv2.imread('E:/image/image0.jpg') # 读取一张图片(必须没有中文路径)
image = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) # 灰度化
全局二值化
sum1 = 0
h, w = np.shape(image)
for i in range(h):
for t in range(w):
sum1 += image[i][t]
mean = sum1/h/w
retval, image_2 = cv2.threshold(image, mean-45, 255, cv2.THRESH_BINARY_INV) # 全局二值化
全局二值化的效果不好,图片有水印,一些边缘地带的颜色比较深,会被被二值化为255,部分文字比较浅会被二值化为0。
局部二值化
将图片分割开在每个区域中进行二值化,二值化后再组合在一起。解决边缘过黑和水印问题。
def Local_binarization(image, filter=(100,100)): # 局部二值化,filter局部视野框大小100*100
h, w = np.shape(image)
h_remainder = h % filter[0] # 取余图片不能被视野框整除
w_remainder = w % filter[1]
h_num = h // filter[0] # 视野框在高上会有几个
w_num = w // filter[1] # 视野框在宽上会有几个
if h_remainder == 0:
h_num_r = 0 # 无多余视野框
h_rem = False # 不需要增加视野框
elif h_remainder < filter[0]/2: # 确定边缘的视野框大小
filter_hr = filter[0] + h_remainder # 高几个像素
h_num_r = -1 # 无多余视野框
h_rem = True
else:
filter_hr = h_remainder
h_num_r = 0 # 有多余视野框
h_rem = True
if w_remainder == 0:
h_num_r =0 # 无多余视野框
w_rem = False # 不需要增加视野框
elif w_remainder < filter[1]/2:
filter_wr = filter[1] + w_remainder # 宽几个像素
w_num_r = -1
w_rem = True
else:
filter_wr = w_remainder
w_num_r = 0
w_rem = True
#print(h_remainder,w_remainder,w_num_r)
#print(filter_hr,filter_wr)
local_image = np.zeros(filter,dtype=np.uint8)
local_image_h = np.zeros((filter_hr,filter[1]), dtype=np.uint8)
local_image_w = np.zeros((filter[0],filter_wr), dtype=np.uint8)
local_image_hw = np.zeros((filter_hr,filter_wr), dtype=np.uint8)
for i in range(h_num + h_num_r):
for t in range(w_num + w_num_r): # 先处理正常的视野框
x0 = 0
y0 = 0
for x in range(i*filter[0],(i+1)*filter[0]):
for y in range(t*filter[1],(t+1)*filter[1]):
local_image[x0,y0] = image[x,y]
y0 += 1
y0 = 0
x0 += 1
#plt.imshow(local_image)
#plt.show()
#cv2.imshow('local', local_image)
#cv2.waitKey(0) # 等待按键按下
#cv2.destroyAllWindows() # 清除所有窗口
#print(local_image.max(),local_image.min()) # 查看有文字和无文字部分的灰度区别
mid = (int(local_image.max()) + int(local_image.min()))/2
# 需要注意的是图像像素值是ubyte类型,ubyte类型数据范围为0~255,若做运算出现负值或超出255,则会抛出异常
dif = local_image.max() - local_image.min()
if dif <= 25:
local_image[0:filter[0], 0:filter[1]] = 0
else:
retval, local_image = cv2.threshold(local_image, mid+5, 255, cv2.THRESH_BINARY_INV)
black_num = 0
for i0 in range(filter[0]): # 检测白色的水印文字
for t0 in range(filter[1]):
if local_image[i0,t0] == 255: # 水印文字二值化背景为黑色,文字为白色
black_num += 1
if black_num >= filter[0]*filter[1]/2:
local_image[0:filter[0],0:filter[1]] = 0 # 完全去除水印
#cv2.imshow('local', local_image)
#cv2.waitKey(0) # 等待按键按下
#cv2.destroyAllWindows() # 清除所有窗口
#plt.imshow(local_image)
#plt.show()
x0 = 0
y0 = 0
for x in range(i*filter[0],(i