微信截图识别
本文主要介绍利用开源的googel ocr工具进行图片文字识别
大体思路:
1对图片灰度化
2.根据灰度变化情况找到各个边界。
3.根据边界裁剪,然后识别昵称、地区、签名
#encoding=utf_8
import pytesseract
import json
import cv2
import numpy as np
from PIL import Image
def weixin_rec(imgpath):
#定义一个空的字典,用于存储识别结果
wx_infor = {"nicheng": "", "address": "", "signature": ""}
list=[]#
n=0
try:
opimage=cv2.imread(imgpath,0)
opimage1=cv2.imread(imgpath,0)
image = Image.open(imgpath)
image = image.convert('L')
except IOError:
print "Error: 没有找到文件或读取文件失败"
return
#图像强制二值化
cv2.threshold(opimage,250,255,0,opimage)
for i in range(0,opimage.shape[0]):
for j in range(0, opimage.shape[1]):
if(opimage[i,j]==255):
opimage[i,j]=1
#
for k in range(0,opimage.shape[0]-1):
if(((opimage[k,:].sum()==0)and(opimage[(k+1),:].sum()==opimage.shape[1]))or((opimage[k,:].sum()>=opimage.shape[1]-6)and(opimage[(k+1),:].sum()<150))):
list.append(k)
print list
try:
for m in range(opimage.shape[1]-1,0,-1):
if (opimage[int(0.85*(list[0]+int(0.6*(list[1]-list[0])))),m]==1 and opimage[int(0.85*(list[0]+int(0.6*(list[1]-list[0])))),m-1]==0 ):
n=m
break
except UnboundLocalError:
print "性别标志定位失败"
except IndexError:
print "性别标志定位失败"
d=int(0.045*(opimage.shape[1]))
if len(list)==7:
imgnicheng = Image.fromarray(opimage1[list[0]:list[1],int(0.25*(opimage.shape[1])):])
imgaddre = Image.fromarray(opimage1[list[4]:list[5],int(0.2*opimage1.shape[1]):int(0.6*opimage1.shape[1])])
imgsig=Image.fromarray(opimage1[list[5]+1:list[6],int(0.25*opimage1.shape[1]):])
nicheng=Image.fromarray(opimage1[list[0]:list[0]+int(0.45*(list[1]-list[0])),int(0.24 * opimage1.shape[1]):n-d])
wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
nicheng.show()
elif len(list)==10:
for m in range(opimage.shape[1] - 1, 0, -1):
if (opimage[int(0.83 * (list[2] + int(0.65*(list[3] - list[2])))), m] == 1 and opimage[int(0.83 * (list[2] + int(0.65 * (list[3] - list[2])))), m - 1] == 0):
n = m
break
imgnicheng = Image.fromarray(opimage1[list[2]:list[3], int(0.25*(opimage.shape[1])):])
nicheng = Image.fromarray(opimage1[list[2]:list[2] + int(0.45 * (list[3] - list[2])), int(0.24 * opimage.shape[1]):n-d])
imgaddre = Image.fromarray(opimage1[list[6]:list[7], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
imgsig = Image.fromarray(opimage1[list[7] + 1:list[8], int(0.25 * opimage1.shape[1]):])
wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
elif len(list)==11:
for m in range(opimage.shape[1] - 1, 0, -1):
if (opimage[int(0.83 * (list[2] + int(0.65*(list[3] - list[2])))), m] == 1 and opimage[int(0.83 * (list[2] + int(0.65 * (list[3] - list[2])))), m - 1] == 0):
n = m
break
imgnicheng = Image.fromarray(opimage1[list[2]:list[3], int(0.25*(opimage.shape[1])):])
nicheng = Image.fromarray(opimage1[list[2]:list[2] + int(0.45 * (list[3] - list[2])), int(0.24 * opimage.shape[1]):n-d])
imgaddre = Image.fromarray(opimage1[list[6]:list[7], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
imgsig = Image.fromarray(opimage1[list[7] + 1:list[8], int(0.25 * opimage1.shape[1]):])
wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
elif len(list)==6:
cs= Image.fromarray(opimage1[list[4]:list[5],:])
imgnicheng = Image.fromarray(opimage1[list[0]:list[1], int(0.25 * opimage1.shape[1]):])
imgaddre = Image.fromarray(opimage1[list[4]:list[5], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
nicheng = Image.fromarray(opimage1[list[0]:list[0]+int(0.45*(list[1]-list[0])), int(0.24 * opimage1.shape[1]):n-d])
wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
cs.show()
elif len(list) == 4:
imgnicheng = Image.fromarray(opimage1[list[0]:list[1], int(0.25 * opimage.shape[1]):])
nicheng = Image.fromarray(opimage1[310:380, int(0.24 * opimage.shape[1]):n-d])
wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
else:
print "图像非添加好友截图"
return
# for i in json.dumps(wx_infor, encoding="UTF-8", ensure_ascii=False):
# wx_infor[i]=wx_infor[i].split('\n')
return json.dumps(wx_infor, encoding="UTF-8", ensure_ascii=False)
# return wx_infor
if __name__ == "__main__":
k=weixin_rec("wx666.jpg")
print k
结果如下: