在这篇博文中手把手教你如何去分割验证,然后进行识别。
一:下载验证码
验证码分析,图片上有折线,验证码有数字,有英文字母大小写,分类的时候需要更多的样本,验证码的字母是彩色的,图片上有雪花等噪点,因此识别改验证码难度较大
二:二值化和降噪:
三: 切割:
四:分类:
五: 测试识别率
六:总结:
综合识别率在70%左右,对于这个识别率我觉得还是挺高的,因为这个验证码的识别难度还是很大
代码:
一. 下载图片:
#-*-coding:utf-8-*-
importrequestsdefspider():
url= "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
for i in range(1, 101):print("正在下载的张数是:",i)
with open("./1__get_image/{}.png".format(i), "wb") as f:
f.write(requests.get(url).content)
spider()
二: 验证码二值化和降噪:
#-*-coding:utf-8-*-#coding:utf-8
importsys, osfrom PIL importImage, ImageDraw#二值数组
t2val ={}deftwoValue(image, G):for y in range(0, image.size[1]):for x inrange(0, image.size[0]):
g=image.getpixel((x, y))if g >G:
t2val[(x, y)]= 1
else:
t2val[(x, y)]=0#根据一个点A的RGB值,与周围的8个点的RBG值比较,设定一个值N(0
defclearNoise(image, N, Z):for i inrange(0, Z):
t2val[(0, 0)]= 1t2val[(image.size[0]- 1, image.size[1] - 1)] = 1
for x in range(1, image.size[0] - 1):for y in range(1, image.size[1] - 1):
nearDots=0
L=t2val[(x, y)]if L == t2val[(x - 1, y - 1)]:
nearDots+= 1
if L == t2val[(x - 1, y)]:
nearDots+= 1
if L == t2val[(x - 1, y + 1)]:
nearDots+= 1
if L == t2val[(x, y - 1)]:
nearDots+= 1
if L == t2val[(x, y + 1)]:
nearDots+= 1
if L == t2val[(x + 1, y - 1)]:
nearDots+= 1
if L == t2val[(x + 1, y)]:
nearDots+= 1
if L == t2val[(x + 1, y + 1)]:
nearDots+= 1
if nearDots
t2val[(x, y)]= 1
defsaveImage(filename, size):
image= Image.new("1", size)
draw=ImageDraw.Draw(image)for x inrange(0, size[0]):for y in range(0, size[1]):
draw.point((x, y), t2val[(x, y)])
image.save(filename)for i in range(1, 101):
path= "1__get_image/" + str(i) + ".png"image=Image.open(path)
image= image.convert('L')
twoValue(image,198)
clearNoise(image,3, 1)
path1= "2__erzhihua_jiangzao/" + str(i) + ".jpg"saveImage(path1, image.size)
三: 切割验证码:
#-*-coding:utf-8-*-
from PIL importImagedef smartSliceImg(img, outDir, ii,count=4, p_w=3):''':param img:
:param outDir:
:param count: 图片中有多少个图片
:param p_w: 对切割地方多少像素内进行判断
:return:'''w, h=img.size
pixdata=img.load()
eachWidth= int(w /count)
beforeX=0for i inrange(count):
allBCount=[]
nextXOri= (i + 1) *eachWidthfor x in range(nextXOri - p_w, nextXOri +p_w):if x >=w:
x= w - 1
if x <0:
x=0
b_count=0for y inrange(h):if pixdata[x, y] ==0:
b_count+= 1allBCount.append({'x_pos': x, 'count': b_count})
sort= sorted(allBCount, key=lambda e: e.get('count'))
nextX= sort[0]['x_pos']
box=(beforeX, 0, nextX, h)
img.crop(box).save(outDir+ str(ii) + "_" + str(i) + ".png")
beforeX=nextXfor ii in range(1, 101):
path= "2__erzhihua_jiangzao/" + str(ii) + ".jpg"img=Image.open(path)
outDir= '3__qiege/'smartSliceImg(img, outDir, ii,count=4, p_w=3)
四: 训练:
#-*-coding:utf-8-*-
importnumpy as npimportosimporttimefrom PIL importImagefrom sklearn.externals importjoblibfrom sklearn.neighbors importKNeighborsClassifierdefload_dataset():
X=[]
y=[]for i in "23456789ABVDEFGHKMNPRSTUVWXYZ":
target_path= "fenlei/" +iprint(target_path)for title inos.listdir(target_path):
pix= np.asarray(Image.open(os.path.join(target_path, title)).convert('L'))
X.append(pix.reshape(25 * 30))
y.append(target_path.split('/')[-1])
X=np.asarray(X)
y=np.asarray(y)returnX, ydefcheck_everyone(model):
pre_list=[]
y_list=[]for i in "23456789ABCDEFGHKMNPRSTUVWXYZ":
part_path= "part/" +ifor title inos.listdir(part_path):
pix= np.asarray(Image.open(os.path.join(part_path, title)).convert('L'))
pix= pix.reshape(25 * 30)
pre_list.append(pix)
y_list.append(part_path.split('/')[-1])
pre_list=np.asarray(pre_list)
y_list=np.asarray(y_list)
result_list=model.predict(pre_list)
acc=0for i in result_list ==y_list:print(result_list,y_list,)if i ==np.bool(True):
acc+= 1
print(acc, acc /len(result_list))
X, y=load_dataset()
knn=KNeighborsClassifier()
knn.fit(X, y)
joblib.dump(knn,'yipai.model')
check_everyone(knn)
五:模型测试:
#-*- coding: utf-8 -*-
importnumpy as npfrom PIL importImagefrom sklearn.externals importjoblibimportos
target_path= "1__get_image/"source_result=[]for title inos.listdir(target_path):
source_result.append(title.replace('.png',''))defpredict(model):
predict_result=[]for q in range(1,101):
pre_list=[]
y_list=[]for i in range(0,4):
part_path= "part1/" + str(q) + "_" + str(i) + ".png"
#print(part_path)
pix =np.asarray(Image.open(os.path.join(part_path)))
pix= pix.reshape(25 * 30)
pre_list.append(pix)
y_list.append(part_path.split('/')[-1])
pre_list=np.asarray(pre_list)
y_list=np.asarray(y_list)
result_list=model.predict(pre_list)print(result_list,q)
predict_result.append(str(result_list[0]+ result_list[1] + result_list[2] + result_list[3]))returnpredict_result
model= joblib.load('yipai.model')
predict_result=predict(model)#print(source_result)#print(predict_result)