微信截图

微信截图识别

本文主要介绍利用开源的googel ocr工具进行图片文字识别

大体思路:
1对图片灰度化
2.根据灰度变化情况找到各个边界。
3.根据边界裁剪,然后识别昵称、地区、签名

#encoding=utf_8
import pytesseract
import json
import cv2
import numpy as np
from PIL import Image
def weixin_rec(imgpath):
    #定义一个空的字典,用于存储识别结果
    wx_infor = {"nicheng": "", "address": "", "signature": ""}
    list=[]#
    n=0
    try:
        opimage=cv2.imread(imgpath,0)
        opimage1=cv2.imread(imgpath,0)
        image = Image.open(imgpath)
        image = image.convert('L')
    except IOError:
        print "Error: 没有找到文件或读取文件失败"
        return
    #图像强制二值化
    cv2.threshold(opimage,250,255,0,opimage)
    for i in range(0,opimage.shape[0]):
        for j in range(0, opimage.shape[1]):
         if(opimage[i,j]==255):
           opimage[i,j]=1
    #
    for k in range(0,opimage.shape[0]-1):

        if(((opimage[k,:].sum()==0)and(opimage[(k+1),:].sum()==opimage.shape[1]))or((opimage[k,:].sum()>=opimage.shape[1]-6)and(opimage[(k+1),:].sum()<150))):
          list.append(k)

    print list
    try:
      for m in range(opimage.shape[1]-1,0,-1):
        if (opimage[int(0.85*(list[0]+int(0.6*(list[1]-list[0])))),m]==1 and opimage[int(0.85*(list[0]+int(0.6*(list[1]-list[0])))),m-1]==0 ):

            n=m
            break
    except UnboundLocalError:

        print "性别标志定位失败"
    except IndexError:

        print "性别标志定位失败"
    d=int(0.045*(opimage.shape[1]))


    if len(list)==7:

         imgnicheng = Image.fromarray(opimage1[list[0]:list[1],int(0.25*(opimage.shape[1])):])
         imgaddre = Image.fromarray(opimage1[list[4]:list[5],int(0.2*opimage1.shape[1]):int(0.6*opimage1.shape[1])])
         imgsig=Image.fromarray(opimage1[list[5]+1:list[6],int(0.25*opimage1.shape[1]):])
         nicheng=Image.fromarray(opimage1[list[0]:list[0]+int(0.45*(list[1]-list[0])),int(0.24 * opimage1.shape[1]):n-d])

         wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
         wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
         wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
         nicheng.show()
    elif len(list)==10:
        for m in range(opimage.shape[1] - 1, 0, -1):
            if (opimage[int(0.83 * (list[2] + int(0.65*(list[3] - list[2])))), m] == 1 and opimage[int(0.83 * (list[2] + int(0.65 * (list[3] - list[2])))), m - 1] == 0):
                n = m
                break

        imgnicheng = Image.fromarray(opimage1[list[2]:list[3], int(0.25*(opimage.shape[1])):])
        nicheng = Image.fromarray(opimage1[list[2]:list[2] + int(0.45 * (list[3] - list[2])), int(0.24 * opimage.shape[1]):n-d])
        imgaddre = Image.fromarray(opimage1[list[6]:list[7], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
        imgsig = Image.fromarray(opimage1[list[7] + 1:list[8], int(0.25 * opimage1.shape[1]):])
        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
        wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
        wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
    elif len(list)==11:
        for m in range(opimage.shape[1] - 1, 0, -1):
            if (opimage[int(0.83 * (list[2] + int(0.65*(list[3] - list[2])))), m] == 1 and opimage[int(0.83 * (list[2] + int(0.65 * (list[3] - list[2])))), m - 1] == 0):
                n = m
                break

        imgnicheng = Image.fromarray(opimage1[list[2]:list[3], int(0.25*(opimage.shape[1])):])
        nicheng = Image.fromarray(opimage1[list[2]:list[2] + int(0.45 * (list[3] - list[2])), int(0.24 * opimage.shape[1]):n-d])
        imgaddre = Image.fromarray(opimage1[list[6]:list[7], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
        imgsig = Image.fromarray(opimage1[list[7] + 1:list[8], int(0.25 * opimage1.shape[1]):])
        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
        wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
        wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})

    elif len(list)==6:
        cs= Image.fromarray(opimage1[list[4]:list[5],:])
        imgnicheng = Image.fromarray(opimage1[list[0]:list[1], int(0.25 * opimage1.shape[1]):])
        imgaddre = Image.fromarray(opimage1[list[4]:list[5], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])

        nicheng = Image.fromarray(opimage1[list[0]:list[0]+int(0.45*(list[1]-list[0])), int(0.24 * opimage1.shape[1]):n-d])

        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
        wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
        cs.show()
    elif len(list) == 4:
        imgnicheng = Image.fromarray(opimage1[list[0]:list[1], int(0.25 * opimage.shape[1]):])

        nicheng = Image.fromarray(opimage1[310:380, int(0.24 * opimage.shape[1]):n-d])
        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})


    else:
        print "图像非添加好友截图"
        return
    # for i in json.dumps(wx_infor, encoding="UTF-8", ensure_ascii=False):
    #     wx_infor[i]=wx_infor[i].split('\n')

    return json.dumps(wx_infor, encoding="UTF-8", ensure_ascii=False)
    # return wx_infor
if __name__ == "__main__":
 k=weixin_rec("wx666.jpg")
 print k

这里写图片描述

结果如下:
这里写图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值