微信截图

最新推荐文章于 2024-05-15 15:55:55 发布

daqinjun

最新推荐文章于 2024-05-15 15:55:55 发布

阅读量732

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/daqinjun/article/details/79025124

版权

python 专栏收录该内容

16 篇文章 0 订阅

订阅专栏

微信截图识别

本文主要介绍利用开源的googel ocr工具进行图片文字识别

大体思路：
1对图片灰度化
2.根据灰度变化情况找到各个边界。
3.根据边界裁剪，然后识别昵称、地区、签名

#encoding=utf_8
import pytesseract
import json
import cv2
import numpy as np
from PIL import Image
def weixin_rec(imgpath):
    #定义一个空的字典，用于存储识别结果
    wx_infor = {"nicheng": "", "address": "", "signature": ""}
    list=[]#
    n=0
    try:
        opimage=cv2.imread(imgpath,0)
        opimage1=cv2.imread(imgpath,0)
        image = Image.open(imgpath)
        image = image.convert('L')
    except IOError:
        print "Error: 没有找到文件或读取文件失败"
        return
    #图像强制二值化
    cv2.threshold(opimage,250,255,0,opimage)
    for i in range(0,opimage.shape[0]):
        for j in range(0, opimage.shape[1]):
         if(opimage[i,j]==255):
           opimage[i,j]=1
    #
    for k in range(0,opimage.shape[0]-1):

        if(((opimage[k,:].sum()==0)and(opimage[(k+1),:].sum()==opimage.shape[1]))or((opimage[k,:].sum()>=opimage.shape[1]-6)and(opimage[(k+1),:].sum()<150))):
          list.append(k)

    print list
    try:
      for m in range(opimage.shape[1]-1,0,-1):
        if (opimage[int(0.85*(list[0]+int(0.6*(list[1]-list[0])))),m]==1 and opimage[int(0.85*(list[0]+int(0.6*(list[1]-list[0])))),m-1]==0 ):

            n=m
            break
    except UnboundLocalError:

        print "性别标志定位失败"
    except IndexError:

        print "性别标志定位失败"
    d=int(0.045*(opimage.shape[1]))


    if len(list)==7:

         imgnicheng = Image.fromarray(opimage1[list[0]:list[1],int(0.25*(opimage.shape[1])):])
         imgaddre = Image.fromarray(opimage1[list[4]:list[5],int(0.2*opimage1.shape[1]):int(0.6*opimage1.shape[1])])
         imgsig=Image.fromarray(opimage1[list[5]+1:list[6],int(0.25*opimage1.shape[1]):])
         nicheng=Image.fromarray(opimage1[list[0]:list[0]+int(0.45*(list[1]-list[0])),int(0.24 * opimage1.shape[1]):n-d])

         wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
         wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
         wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
         nicheng.show()
    elif len(list)==10:
        for m in range(opimage.shape[1] - 1, 0, -1):
            if (opimage[int(0.83 * (list[2] + int(0.65*(list[3] - list[2])))), m] == 1 and opimage[int(0.83 * (list[2] + int(0.65 * (list[3] - list[2])))), m - 1] == 0):
                n = m
                break

        imgnicheng = Image.fromarray(opimage1[list[2]:list[3], int(0.25*(opimage.shape[1])):])
        nicheng = Image.fromarray(opimage1[list[2]:list[2] + int(0.45 * (list[3] - list[2])), int(0.24 * opimage.shape[1]):n-d])
        imgaddre = Image.fromarray(opimage1[list[6]:list[7], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
        imgsig = Image.fromarray(opimage1[list[7] + 1:list[8], int(0.25 * opimage1.shape[1]):])
        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
        wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
        wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})
    elif len(list)==11:
        for m in range(opimage.shape[1] - 1, 0, -1):
            if (opimage[int(0.83 * (list[2] + int(0.65*(list[3] - list[2])))), m] == 1 and opimage[int(0.83 * (list[2] + int(0.65 * (list[3] - list[2])))), m - 1] == 0):
                n = m
                break

        imgnicheng = Image.fromarray(opimage1[list[2]:list[3], int(0.25*(opimage.shape[1])):])
        nicheng = Image.fromarray(opimage1[list[2]:list[2] + int(0.45 * (list[3] - list[2])), int(0.24 * opimage.shape[1]):n-d])
        imgaddre = Image.fromarray(opimage1[list[6]:list[7], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])
        imgsig = Image.fromarray(opimage1[list[7] + 1:list[8], int(0.25 * opimage1.shape[1]):])
        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
        wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
        wx_infor.update({"signature": pytesseract.image_to_string(imgsig, lang="chi_sim")})

    elif len(list)==6:
        cs= Image.fromarray(opimage1[list[4]:list[5],:])
        imgnicheng = Image.fromarray(opimage1[list[0]:list[1], int(0.25 * opimage1.shape[1]):])
        imgaddre = Image.fromarray(opimage1[list[4]:list[5], int(0.2 * opimage1.shape[1]):int(0.6 * opimage1.shape[1])])

        nicheng = Image.fromarray(opimage1[list[0]:list[0]+int(0.45*(list[1]-list[0])), int(0.24 * opimage1.shape[1]):n-d])

        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})
        wx_infor.update({"address": pytesseract.image_to_string(imgaddre, lang="chi_sim")})
        cs.show()
    elif len(list) == 4:
        imgnicheng = Image.fromarray(opimage1[list[0]:list[1], int(0.25 * opimage.shape[1]):])

        nicheng = Image.fromarray(opimage1[310:380, int(0.24 * opimage.shape[1]):n-d])
        wx_infor.update({"nicheng": pytesseract.image_to_string(nicheng, lang="chi_sim")})


    else:
        print "图像非添加好友截图"
        return
    # for i in json.dumps(wx_infor, encoding="UTF-8", ensure_ascii=False):
    #     wx_infor[i]=wx_infor[i].split('\n')

    return json.dumps(wx_infor, encoding="UTF-8", ensure_ascii=False)
    # return wx_infor
if __name__ == "__main__":
 k=weixin_rec("wx666.jpg")
 print k

这里写图片描述

结果如下：
这里写图片描述

daqinjun

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
微信截图

微信截图识别本文主要介绍利用开源的googel ocr工具进行图片文字识别大体思路： 1对图片灰度化 2.根据灰度变化情况找到各个边界。 3.根据边界裁剪，然后识别昵称、地区、签名#encoding=utf_8import pytesseractimport jsonimport cv2import numpy as npfrom PIL import Image
复制链接

扫一扫