具体学习可去这个网址:点击打开链接
# __author__ = 'youngkl'
# -*- coding: utf-8 -*-
from PIL import Image
import hashlib
import time
import os
import math
class VectorCompare:
def magnitude(self,concordance):
total=0
for word,count in concordance.iteritems():
total+=count**2 # count的平方
return math.sqrt(total)
def relation(self,concordance1,concordance2):
relevance=0
topvalue=0
for word,count in concordance1.iteritems():
if concordance2.has_key(word):
topvalue+=count*concordance2[word]
return topvalue/(self.magnitude(concordance1)*self.magnitude(concordance2))
def buildvector(im):
d1={}
count=0
for i in im.getdata():
d1[count]=i
count+=1
return d1
v=VectorCompare()
iconset = ['0','1','2','3','4','5','6','7','8','9','0','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
imageset=[]
for letter in iconset:
for img in os.listdir('./iconset/%s/'%(letter)):
temp=[]
if img!="Thumbs.db" and img!=".DS_Store" :
temp.append(buildvector(Image.open("./iconset/%s/%s"%(letter,img))))
imageset.append({letter:temp})
im=Image.open("ValidateCode (2).jpg")
im2=Image.new("L",im.size,255)
im=im.convert("L")
temp={}
for x in range(im.size[1]):
for y in range(im.size[0]):
pix=im.getpixel((y,x))
if pix>130:
pix=0
else:
pix=255
# if pix == 236 : # these are the numbers to get
im2.putpixel((y,x),pix)
# temp[pix]=pix
# if pix==0 :
# im2.putpixel((y,x),0)
im2.show()
inletter=False
foundLetter=False
start=0
end=0
letters=[]
for y in range(im2.size[0]):
for x in range(im2.size[1]):
pix=im2.getpixel((y,x))
# print pix
if pix!=0:
inletter=True
if foundLetter==False and inletter==True:
foundLetter=True
start=y
if foundLetter==True and inletter==False:
foundLetter=False
end=y
letters.append((start,end))
inletter=False
print letters
count=0
for letter in letters:
m=hashlib.md5()
im3=im2.crop((letter[0],0,letter[1],im2.size[1]))
guess=[]
for image in imageset:
for x,y in image.iteritems():
if len(y)!=0:
guess.append((v.relation(y[0],buildvector(im3)),x))
guess.sort(reverse=True)
print "",guess[0]
count+=1
但是目前会有一个问题 现在的验证码不再是普通的单色验证码 如果有多种颜色干扰应该怎么处理
如果图像后面有个干扰的条纹就会对字符切割产生问题 这个应该怎么处理。。。