使用python简易逻辑识别数字验证码
import requests
import sys, os
from PIL import Image, ImageDraw
import pytesseract
import numpy as np
import re
pytesseract.pytesseract.tesseract_cmd = r'D:\ORC\tesseract.exe'
# 二值数组
t2val = {}
def twoValue(image, G):
for y in range(0, image.size[1]):
for x in range(0, image.size[0]):
g = image.getpixel((x, y))
if g > G:
t2val[(x, y)] = 1
else:
t2val[(x, y)] = 0
# 根据一个点A的RGB值,与周围的8个点的RBG值比较,设定一个值N(0 <N <8),当A的RGB值与周围8个点的RGB相等数小于N时,此点为噪点
# G: Integer 图像二值化阀值
# N: Integer 降噪率 0 <N <8
# Z: Integer 降噪次数
# 输出
# 0:降噪成功
# 1:降噪失败
def clearNoise(image, N, Z):
for i in range(0, Z):
t2val[(0, 0)] = 1
t2val[(image.size[0] - 1, image.size[1] - 1)] = 1
for x in range(1, image.size[0] - 1):
for y in range(1, image.size[1] - 1):
nearDots = 0
L = t2val[(x, y)]
if L == t2val[(x - 1, y - 1)]:
nearDots += 1
if L == t2val[(x - 1, y)]:
nearDots += 1
if L == t2val[(x - 1, y + 1)]:
nearDots += 1
if L == t2val[(x, y - 1)]:
nearDots += 1
if L == t2val[(x, y + 1)]:
nearDots += 1
if L == t2val[(x + 1, y - 1)]:
nearDots += 1
if L == t2val[(x + 1, y)]:
nearDots += 1
if L == t2val[(x + 1, y + 1)]:
nearDots += 1
if nearDots < N:
t2val[(x, y)] = 1
def grx1(image,w,h,l):
data = image
for x in range(1,w-1):
for y in range(1,h-5):
if t2val[(x, y)] ==0:
#0为黑色,1为白色
#去横干扰线
l1=0
l2=0
for i in range(l):
for j in range(2):
if t2val[(i+x, j+y)] ==0:
l1+=1
for i in range(l):
for j in [-1,2]:
if t2val[(i+x, j+y)] ==1:
l2+=1
if l1>2*l-2 and l2>2*l-3:
for i in range(3):
for j in range(2):
t2val[(i+x, j+y)] = 1
#去竖干扰线
# l1=0
# l2=0
# for i in range(l):
# for j in range(2):
# if t2val[(j+x, i+y)] ==0:
# l1+=1
# for i in range(l):
# for j in [-1,2]:
# if t2val[(j+x, i+y)] ==1:
# l2+=1
# if l1>2*l-2 and l2>2*l-3:
# for i in range(3):
# for j in range(2):
# t2val[(j+x, i+y)] = 1
else:
pass
def saveImage(filename, size):
# print(size)
image = Image.new("1", (120,46))
draw = ImageDraw.Draw(image)
for x in range(40, size[0]-40):
for y in range(2, size[1]-2):
draw.point((x-40, y-2), t2val[(x, y)])
image.save(filename)
while 1:
urls = 'http://info.vecc.org.cn/ve/kaptcha.jpg'
session = requests.Session()
html = session.get(urls)
cookie = {'JSESSIONID':session.cookies.get('JSESSIONID')}
with open(f'1.jpg','wb') as fw:
fw.write(html.content)
image = Image.open('1.jpg').convert("L")
twoValue(image, 120)
clearNoise(image, 4, 10)
# grx1(image,200,50,4)
grx1(image,200,50,3)
path1 = "3.jpg"
saveImage(path1, image.size)
text = pytesseract.image_to_string(Image.open('3.jpg')).lower()
text = text.strip()
text = text.replace(' ','')
text = text.replace('\n','')
text = re.sub(r'[^\w\s]','',text)
if len(text)==5:
n+=1
saveImage(text+'.png', image.size)
data={
'vin': 'qe',
'fdjh': '123456',
'vaild': text
}
a=requests.post('http://info.vecc.org.cn/ve/vin/index',data,verify=False, cookies=cookie).content
if b'\\u9A8C\\u8BC1\\u7801\\u9519\\u8BEF\\uFF01' in a:
pass
else:
print('pass')
break