selenium实现无界面含验证码模拟登陆(python爬虫)
- 使用超级鹰破解验证码
import urllib.request
import http.cookiejar
import requests
from hashlib import md5
from io import BytesIO
import pandas as pd
import xlrd
from selenium.webdriver.chrome.options import Options
import time
from PIL import Image
import matplotlib.image as mp
from selenium import webdriver
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files,
headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
2.获取验证码
def get_code(code_url):
chaojiying = Chaojiying_Client('15861425581', '123456', '898761')
urllib.request.urlretrieve(url=code_url, filename='code.jpg')
im = open('code.jpg', 'rb').read()
code_dict = chaojiying.PostPic(im, 1902)
code = code_dict['pic_str']
return code
3.登录代码实现并获取cookie
def get_cookie():
url = 'xxxxxx'
path = './chromedriver.exe'
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)
driver.get(url)
code_url = driver.find_element_by_xpath('//div/img').get_attribute('src')
time.sleep(5)
driver.find_element_by_id('mobile').send_keys('xxxxxxxx')
driver.find_element_by_id('password').send_keys('xxxxxxxx')
code = get_code(code_url)
driver.find_element_by_id('answer').send_keys(code)
driver.find_element_by_xpath('//span/button[1]').click()
cookies = driver.get_cookies()
JSESSIONID = cookies[-1]['value']
cookie = 'JSESSIONID={};ADMINID=100150'.format(JSESSIONID)
return cookie