字符识别

#coding:utf-8

import os
import requests
from PIL import Image
import math,time

def imagesget():
    os.mkdir('images')
    count=0
    while True:
        img=requests.get('http://wsxk.hust.edu.cn/randomImage.action').content
        with open('images/%s.jpeg'%count,'wb') as imgfile:
            imgfile.write(img)
        count+=1
        if(count==100):
            break

def convert_image(image):
    image=image.convert('L')#灰度
    image2=Image.new('L',image.size,255)
    for x in range(image.size[0]):
        for y in range(image.size[1]):
            pix=image.getpixel((x,y))
            if pix<120:#灰度低于120 设置为 0
                image2.putpixel((x,y),0)
    image2.save('L.png')#将灰度图存储下来看效果
    return image2

def cut_image(image):
    ''' 字符切割,根据黑色的连续性,当某一列出现黑色为标志,当黑色消失为结束点'''
    inletter=False
    foundletter=False
    letters=[]
    start=0
    end=0
    for x in range(image.size[0]):
        for y in range(image.size[1]):
            pix=image.getpixel((x,y))
            if(pix==0):
                inletter=True
        if foundletter==False and inletter ==True:
            foundletter=True
            start=x
        if foundletter==True and inletter==False:
            end=x
            letters.append((start,end))
            foundletter=False
        inletter=False
    images=[]
    for letter in letters:
        img=image.crop((letter[0],0,letter[1],image.size[1]))
        #img.save(str(letter[0])+'.jpeg')#展示切割效果
        images.append(img)
    return images

def buildvector(image):
    ''' 图片转换成矢量,将二维的图片转为一维'''
    result={}
    count=0
    for i in image.getdata():
        result[count]=i
        count+=1
    return result


class CaptchaRecognize:
    def __init__(self):
        self.letters=['0','1','2','3','4','5','6','7','8','9']
        self.loadSet()

    def loadSet(self):
        self.imgset=[]
        for letter in self.letters:
            temp=[]
            for img in os.listdir('./icon/%s'%(letter)):
                temp.append(buildvector(Image.open('./icon/%s/%s'%(letter,img))))
            self.imgset.append({letter:temp})

    #计算矢量大小
    def magnitude(self,concordance):
        total = 0
        for word,count in concordance.items():
            total += count ** 2
        return math.sqrt(total)

    #计算矢量之间的 cos 值
    def relation(self,concordance1, concordance2):

        relevance = 0
        topvalue = 0
        for word, count in concordance1.items():
            if word in concordance2:
                print type(topvalue),topvalue,count,concordance2[word]
                topvalue += count * concordance2[word]
                time.sleep(10)
        return topvalue / (self.magnitude(concordance1) * self.magnitude(concordance2))

    def recognise(self,image):
        image=convert_image(image)#二值化
        images=cut_image(image)#字符单独切割出来
        vectors=[]
        for img in images:
            vectors.append(buildvector(img))
        result=[]
        for vector in vectors:
            guess=[]
            for image in self.imgset:
                for letter,temp in image.items():
                    relevance=0
                    num=0
                    for img in temp:
                        relevance+=self.relation(vector,img)
                        num+=1
                    relevance=relevance/num
                    guess.append((relevance,letter))
            guess.sort(reverse=True)
            result.append(guess[0])
        return result

if __name__=='__main__':
    imageRecognize=CaptchaRecognize()
    # 设置图片路径
    image=Image.open('3.png')
    print image.mode

    result=imageRecognize.recognise(image)

    string=[''.join(item[1]) for item in result]
    print(string)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值