用selenium工具获取考生之家登录图片验证码
目的:将验证码从页面中抠出来,丢给超级鹰(图片解析工具)进行处理,获取返回的结果
1.如何从页面扣除图片
1)获取整个页面的图片
2)根据验证码的两个坐标进行抠图
2.如何给超级鹰处理
超级鹰链接开发文档链接
开发文档中因格式有问题,需要自行进行调整,pycharm中快捷键 shift+alt+L
末尾print需要加()
首先导包
from io import BytesIO
#selenium浏览器自动化测试工具
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from PIL import Image
from chaojiying import check_code
代码实现
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 50)
def main():
url = 'http://bm.e21cn.com/log/login.aspx'
browser.get(url)
#1.截取整个窗口的图片
pic1 = browser.get_screenshot_as_png()
img1 = Image.open(BytesIO(pic1))
img1.save('screen.png')
#2.获取验证码的位置
pic2 = wait.until(
EC.presence_of_element_located((By.XPATH, '//*[@id="imgCheckCode"]' ))
)
#获取当前元素的x,y坐标
location = pic2.location
#获取当前元素的height.width
size = pic2.size
print(pic2)
#此处因为电脑像素比例不同,mac*2,神舟*1.25可在画图工具中查看具体的坐标和高宽
x1 = location['x']
y1 = location['y']
x2 = x1 + size['width']
y2 = y1 + size['height']
# 3.根据坐标点扣除验证码
img2 = img1.crop((x1,y1,x2,y2))
img2.save('code.png')
#4,调用超级鹰进行处理
check_code('code.png')
if __name__ == '__main__':
main()
python调用超级鹰代码(chaojiying.py)
#!/usr/bin/env python
# coding:utf-8
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files,
headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
def check_code(name):
# 用户名和密码可以登陆超级鹰注册
chaojiying = Chaojiying_Client('用户名', '密码', '96001') # 用户中心>>软件ID 生成一个替换 96001
im = open(r'C:\Users\qf\spider\day04\\' +name, 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
print(chaojiying.PostPic(im, 1902)) # 1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()