最近学习爬虫,知道了有网站专门做自动识别验证码的,于是就想试试12306的验证码要怎么自动登录呢。
工具
:Selenium、python、超级鹰
基本思路:
1.使用selenium工具访问12306
2.自动输入用户名和密码
3.获取验证码图片
4.使用超级鹰提供的API识别验证码
5.点击坐标
似乎还挺简单的,直接在超级鹰提供的例程API上改动一下,源码:
代码
#!/usr/bin/env python
# coding:utf-8
from selenium import webdriver
import time
import requests
from hashlib import md5
import re
import base64
from selenium.webdriver.common.action_chains import ActionChains
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
class Login:
def __init__(self, url, username, passwd):
self.url = url
self.username = username
self.passwd = passwd
def login(self):
self.browser = webdriver.Chrome()
self.browser.get(self.url)
time.sleep(2)
login_select = self.browser.find_element_by_class_name('login-hd-account')
login_select.click()
user = self.browser.find_element_by_id('J-userName')
word = self.browser.find_element_by_id('J-password')
user.send_keys(self.username)
word.send_keys(self.passwd)
def get_pic(self):
tag = self.browser.find_element_by_class_name('imgCode')
temp = tag.get_attribute('src')
b64_pic = re.sub(r'data:image/jpg;base64,', '', temp)
pic = base64.b64decode(b64_pic)
return pic
def click(self, j):
temp = j.get('pic_str')
locations = [list(map(int, i.split(','))) for i in temp.split('|')] # [[11, 22], [33, 44]]
for location in locations:
ActionChains(self.browser).move_to_element_with_offset(self.browser.find_element_by_class_name('imgCode'),
location[0], location[1]).click().perform()
time.sleep(1)
self.browser.find_element_by_id('J-login').click()
def get_cookies(self):
return self.browser.get_cookies()
if __name__ == '__main__':
a = Login('https://kyfw.12306.cn/otn/resources/login.html', '12306账号', '密码')
a.login()
time.sleep(2)
chaojiying = Chaojiying_Client('超级鹰账号', '密码', '软件id') # 用户中心>>软件ID 生成一个替换 96001
im = a.get_pic() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
print(im)
z = chaojiying.PostPic(im, 9004)
print(z)
a.click(z) # 1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()
print(a.get_cookies())
1元1000分,9004类型一次25分,价格好像挺低的了
测试时使用过更贵的9201类型,可识别1-5个对象,但结果是识别错误了,有点纳闷,用半价的9004类型测试了几次都能成功识别