识别网页验证码

使用python简易逻辑识别数字验证码

 

 

import requests
import sys, os
from PIL import Image, ImageDraw
import pytesseract
import numpy as np
import re

pytesseract.pytesseract.tesseract_cmd = r'D:\ORC\tesseract.exe'

# 二值数组
t2val = {}

def twoValue(image, G):
    for y in range(0, image.size[1]):
        for x in range(0, image.size[0]):
            g = image.getpixel((x, y))
            if g > G:
                t2val[(x, y)] = 1
            else:
                t2val[(x, y)] = 0


# 根据一个点A的RGB值,与周围的8个点的RBG值比较,设定一个值N(0 <N <8),当A的RGB值与周围8个点的RGB相等数小于N时,此点为噪点
# G: Integer 图像二值化阀值
# N: Integer 降噪率 0 <N <8
# Z: Integer 降噪次数
# 输出
#  0:降噪成功
#  1:降噪失败
def clearNoise(image, N, Z):
    for i in range(0, Z):
        t2val[(0, 0)] = 1
        t2val[(image.size[0] - 1, image.size[1] - 1)] = 1

        for x in range(1, image.size[0] - 1):
            for y in range(1, image.size[1] - 1):
                nearDots = 0
                L = t2val[(x, y)]
                if L == t2val[(x - 1, y - 1)]:
                    nearDots += 1
                if L == t2val[(x - 1, y)]:
                    nearDots += 1
                if L == t2val[(x - 1, y + 1)]:
                    nearDots += 1
                if L == t2val[(x, y - 1)]:
                    nearDots += 1
                if L == t2val[(x, y + 1)]:
                    nearDots += 1
                if L == t2val[(x + 1, y - 1)]:
                    nearDots += 1
                if L == t2val[(x + 1, y)]:
                    nearDots += 1
                if L == t2val[(x + 1, y + 1)]:
                    nearDots += 1

                if nearDots < N:
                    t2val[(x, y)] = 1

def grx1(image,w,h,l):
    data = image
    for x in range(1,w-1):
        for y in range(1,h-5):
            if t2val[(x, y)] ==0:
                #0为黑色,1为白色
                
                #去横干扰线
                l1=0
                l2=0
                for i in range(l):
                    for j in range(2):
                        if t2val[(i+x, j+y)] ==0:
                            l1+=1  
                for i in range(l):
                    for j in [-1,2]:
                        if t2val[(i+x, j+y)] ==1:
                            l2+=1 

                if l1>2*l-2 and l2>2*l-3:
                    for i in range(3):
                        for j in range(2):
                            t2val[(i+x, j+y)] = 1
                
                #去竖干扰线
#                 l1=0
#                 l2=0            
#                 for i in range(l):
#                     for j in range(2):
#                         if t2val[(j+x, i+y)] ==0:
#                             l1+=1  
#                 for i in range(l):
#                     for j in [-1,2]:
#                         if t2val[(j+x, i+y)] ==1:
#                             l2+=1           

#                 if l1>2*l-2 and l2>2*l-3:
#                     for i in range(3):
#                         for j in range(2):
#                             t2val[(j+x, i+y)] = 1
            else:
                pass

def saveImage(filename, size):
#     print(size)
    image = Image.new("1", (120,46))
    draw = ImageDraw.Draw(image)

    for x in range(40, size[0]-40):
        for y in range(2, size[1]-2):
            draw.point((x-40, y-2), t2val[(x, y)])

    image.save(filename)

while 1:
    urls = 'http://info.vecc.org.cn/ve/kaptcha.jpg'
    session = requests.Session()
    html = session.get(urls)
    cookie = {'JSESSIONID':session.cookies.get('JSESSIONID')}
    with open(f'1.jpg','wb') as fw:
        fw.write(html.content)
    image = Image.open('1.jpg').convert("L")
    twoValue(image, 120)
    clearNoise(image, 4, 10)
    
#     grx1(image,200,50,4)
    grx1(image,200,50,3)
    path1 = "3.jpg"
    saveImage(path1, image.size)
    
    text = pytesseract.image_to_string(Image.open('3.jpg')).lower()
    text = text.strip()
    text = text.replace(' ','')
    text = text.replace('\n','')
    text = re.sub(r'[^\w\s]','',text)
    
    if len(text)==5:  
        n+=1
        saveImage(text+'.png', image.size)
        data={
            'vin': 'qe',
            'fdjh': '123456',
            'vaild': text
        }
        a=requests.post('http://info.vecc.org.cn/ve/vin/index',data,verify=False, cookies=cookie).content
        if b'\\u9A8C\\u8BC1\\u7801\\u9519\\u8BEF\\uFF01' in a:
            pass
        else:
            print('pass')
            break

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值