python获取验证码失败_python识别验证码的思路及解决方案

该博客介绍了使用Python和Selenium库实现自动化登录过程,包括验证码的图像处理、识别和比较。首先初始化IE浏览器,然后通过截屏、降噪、切割验证码图片,并使用平均哈希算法进行图像比对,找到匹配的验证码。最后完成登录操作。
摘要由CSDN通过智能技术生成

#coding=utf-8

import os

import re

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

import time

from selenium.webdriver.common.action_chains import ActionChains

import collections

import mongoDbBase

import numpy

import imagehash

from PIL import Image,ImageFile

import datetime

class finalNews_IE:

def __init__(self,strdate,logonUrl,firstUrl,keyword_list,exportPath,codepath,codedir):

self.iniDriver()

self.db = mongoDbBase.mongoDbBase()

self.date = strdate

self.firstUrl = firstUrl

self.logonUrl = logonUrl

self.keyword_list = keyword_list

self.exportPath = exportPath

self.codedir = codedir

self.hash_code_dict ={}

for f in range(0,10):

for l in range(1,5):

file = os.path.join(codedir, "codeLibrary\code" +  str(f) + '_'+str(l) + ".png")

# print(file)

hash = self.get_ImageHash(file)

self.hash_code_dict[hash]= str(f)

def iniDriver(self):

# 通过配置文件获取IEDriverServer.exe路径

IEDriverServer = "C:\Program Files\Internet Explorer\IEDriverServer.exe"

os.environ["webdriver.ie.driver"] = IEDriverServer

self.driver = webdriver.Ie(IEDriverServer)

def WriteData(self, message, fileName):

fileName = os.path.join(os.getcwd(), self.exportPath + '/' + fileName)

with open(fileName, 'a') as f:

f.write(message)

# 获取图片文件的hash值

def get_ImageHash(self,imagefile):

hash = None

if os.path.exists(imagefile):

with open(imagefile, 'rb') as fp:

hash = imagehash.average_hash(Image.open(fp))

return hash

# 点降噪

def clearNoise(self, imageFile, x=0, y=0):

if os.path.exists(imageFile):

image = Image.open(imageFile)

image = image.convert('L')

image = numpy.asarray(image)

image = (image > 135) * 255

image = Image.fromarray(image).convert('RGB')

# save_name = "D:\work\python36_crawl\Veriycode\mode_5590.png"

# image.save(save_name)

image.save(imageFile)

return image

#切割验证码

# rownum:切割行数;colnum:切割列数;dstpath:图片文件路径;img_name:要切割的图片文件

def splitimage(self, imagePath,imageFile,rownum=1, colnum=4):

img = Image.open(imageFile)

w, h = img.size

if rownum <= h and colnum <= w:

print('Original image info: %sx%s, %s, %s' % (w, h, img.format, img.mode))

print('开始处理图片切割, 请稍候...')

s = os.path.split(imageFile)

if imagePath == '':

dstpath = s[0]

fn = s[1].split('.')

basename = fn[0]

ext = fn[-1]

num = 1

rowheight = h // rownum

colwidth = w // colnum

file_list =[]

for r in range(rownum):

index = 0

for c in range(colnum):

# (left, upper, right, lower)

# box = (c * colwidth, r * rowheight, (c + 1) * colwidth, (r + 1) * rowheight)

if index < 1:

colwid = colwidth + 6

elif index < 2:

colwid = colwidth + 1

elif index < 3:

colwid = colwidth

box = (c * colwid, r * rowheight, (c + 1) * colwid, (r + 1) * rowheight)

newfile = os.path.join(imagePath, basename + '_' + str(num) + '.' + ext)

file_list.append(newfile)

img.crop(box).save(newfile, ext)

num = num + 1

index += 1

return file_list

def compare_image_with_hash(self, image_hash1,image_hash2, max_dif=0):

"""

max_dif: 允许最大hash差值, 越小越精确,最小为0

推荐使用

"""

dif = image_hash1 - image_hash2

# print(dif)

if dif < 0:

dif = -dif

if dif <= max_dif:

return True

else:

return False

# 截取验证码图片

def savePicture(self):

self.driver.get(self.logonUrl)

self.driver.maximize_window()

time.sleep(1)

self.driver.save_screenshot(self.codedir +"\Temp.png")

checkcode = self.driver.find_element_by_id("checkcode")

location = checkcode.location  # 获取验证码x,y轴坐标

size = checkcode.size  # 获取验证码的长宽

rangle = (int(location['x']), int(location['y']), int(location['x'] + size['width']),

int(location['y'] + size['height']))  # 写成我们需要截取的位置坐标

i = Image.open(self.codedir +"\Temp.png")  # 打开截图

result = i.crop(rangle)  # 使用Image的crop函数,从截图中再次截取我们需要的区域

filename = datetime.datetime.now().strftime("%M%S")

filename =self.codedir +"\Temp_code.png"

result.save(filename)

self.clearNoise(filename)

file_list = self.splitimage(self.codedir,filename)

verycode =''

for f in file_list:

imageHash = self.get_ImageHash(f)

for h,code in self.hash_code_dict.items():

flag = self.compare_image_with_hash(imageHash,h,0)

if flag:

# print(code)

verycode+=code

break

print(verycode)

self.driver.close()

def longon(self):

self.driver.get(self.logonUrl)

self.driver.maximize_window()

time.sleep(1)

self.savePicture()

accname = self.driver.find_element_by_id("username")

# accname = self.driver.find_element_by_id("//input[@id='username']")

accname.send_keys('ctrchina')

accpwd = self.driver.find_element_by_id("password")

# accpwd.send_keys('123456')

code = self.getVerycode()

checkcode = self.driver.find_element_by_name("checkcode")

checkcode.send_keys(code)

submit = self.driver.find_element_by_name("button")

submit.click()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值