准备工作
安装好selenium库,使用的浏览器为Chrome 这次使用的打码平台为超级鹰,提前准备好账户 练习地址为:https://captcha3.scrape.center/
获取打码平台API
https://www.chaojiying.com/api-14.html 如下所示
import requests
from hashlib import md5
class Chaojiying_Client ( object ) :
def __init__ ( self, username, password, soft_id) :
self. username = username
self. password = md5( password. encode( 'utf-8' ) ) . hexdigest( )
self. soft_id = soft_id
self. base_params = {
'user' : self. username,
'pass2' : self. password,
'softid' : self. soft_id,
}
self. headers = {
'Connection' : 'Keep-Alive' ,
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)' ,
}
def PostPic ( self, im, codetype) :
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype' : codetype,
}
params. update( self. base_params)
files = { 'userfile' : ( 'ccc.jpg' , im) }
r = requests. post( 'http://upload.chaojiying.net/Upload/Processing.php' , data= params, files= files, headers= self. headers)
return r. json( )
def ReportError ( self, im_id) :
"""
im_id:报错题目的图片ID
"""
params = {
'id' : im_id,
}
params. update( self. base_params)
r = requests. post( 'http://upload.chaojiying.net/Upload/ReportError.php' , data= params, headers= self. headers)
return r. json( )
初始化
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium. webdriver import ActionChains
from selenium. webdriver. common. by import By
from selenium. webdriver. support. ui import WebDriverWait
from selenium. webdriver. support import expected_conditions as EC
from chaojiying import Chaojiying_Client
USERNAME = 'admin'
PASSWORD = 'admin'
CHAOJIYING_USERNAME = 'username'
CHAOJIYING_PASSWORD = 'password'
CHAOJIYING_SOFT_ID = 913617
CHAOJIYING_KIND = 9004
class CrackCaptcha ( ) :
def __init__ ( self) :
self. url = 'https://captcha3.scrape.center/'
self. browser = webdriver. Chrome( )
self. wait = WebDriverWait( self. browser, 20 )
self. username = USERNAME
self. password = PASSWORD
self. chaojiying = Chaojiying_Client( CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
def __del__ ( self) :
"""gc机制关闭浏览器"""
self. browser. close( )
获取验证码
def open ( self) :
"""打开网页输入用户名和密码"""
self. browser. get( self. url)
username = self. wait. until( EC. presence_of_element_located( ( By. CSS_SELECTOR, 'input[type="text"]' ) ) )
password = self. wait. until( EC. presence_of_element_located( ( By. CSS_SELECTOR, 'input[type="password"]' ) ) )
username. send_keys( self. username)
time. sleep( 2 )
password. send_keys( self. password)
def get_captcha_button ( self) :
"""获取初始验证按钮"""
button = self. wait. until( EC. presence_of_element_located( ( By. CSS_SELECTOR, 'button[type="button"]' ) ) )
return button
def get_captcha_element ( self) :
"""获取验证图片对象"""
self. wait. until( EC. presence_of_element_located( ( By. CSS_SELECTOR, 'img.geetest_item_img' ) ) )
element = self. wait. until( EC. presence_of_element_located( ( By. CLASS_NAME, 'geetest_widget' ) ) )
return element
def get_captcha_position ( self) :
"""获取验证码位置"""
element = self. get_captcha_element( )
time. sleep( 2 )
location = element. location
size = element. size
top, bottom, left, right = location[ 'y' ] , location[ 'y' ] + size[ 'height' ] , location[ 'x' ] , location[ 'x' ] + size[ 'width' ]
return [ top, bottom, left, right]
def get_screenshot ( self) :
"""获取网页截图"""
screenshot = self. browser. get_screenshot_as_png( )
screenshot = Image. open ( BytesIO( screenshot) )
screenshot. save( 'screenshot.png' )
return screenshot
def get_captcha_image ( self, name= 'captcha.png' ) :
"""获取验证码图片"""
top, bottom, left, right = self. get_captcha_position( )
print ( '验证码位置' , top, bottom, left, right)
screenshot = self. get_screenshot( )
captcha = screenshot. crop( ( left, top, right, bottom) )
captcha. save( name)
return captcha
解析结果并提交
def get_points ( self, captcha_result) :
"""解析识别结果"""
groups = captcha_result. get( 'pic_str' ) . split( '|' )
locations = [ [ int ( number) for number in group. split( ',' ) ] for group in groups]
return locations
def touch_click_words ( self, locations) :
"""点击验证图片"""
for location in locations:
ActionChains( self. browser) . move_to_element_with_offset( self. get_captcha_element( ) , location[ 0 ] , location[ 1 ] ) . click( ) . perform( )
time. sleep( 1 )
def get_verifi_button ( self) :
"""确认按钮"""
submit = self. wait. until( EC. presence_of_element_located( ( By. CSS_SELECTOR, '.geetest_commit_tip' ) ) )
submit. click( )
运行
def crack_login ( self) :
"""登录"""
self. open ( )
time. sleep( 1 )
button = self. get_captcha_button( )
button. click( )
image = self. get_captcha_image( )
bytes_array = BytesIO( )
image. save( bytes_array, format = 'PNG' )
result = self. chaojiying. PostPic( bytes_array. getvalue( ) , CHAOJIYING_KIND)
print ( result)
locations = self. get_points( result)
self. touch_click_words( locations)
time. sleep( 3 )
self. get_verifi_button( )
time. sleep( 5 )
success = self. wait. until( EC. text_to_be_present_in_element( ( By. TAG_NAME, 'h2' ) , '登录成功' ) )
if success:
print ( '登录成功' )
else :
print ( '登录失败' )
if __name__ == '__main__' :
crack = CrackCaptcha( )
crack. crack_login( )
结语
这样就完成了借助打码平台解决验证码的问题,借助打码平台几乎任意的验证码都可以识别,是一种通用的方法。