爬虫 解决验证码问题
1.利用图像处理技术
2.用网上一些处理验证码的工具,如超级鹰
3.用已登录好的cookie
这次介绍selenium用超级鹰的工具来自动化处理验证码来完成登录
from chaojiying import Chaojiying_Client #从超级鹰上下载库,导入
from selenium.webdriver import Edge
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
'''
# 用户中心>>软件ID 生成一个替换 96001
chaojiying = Chaojiying_Client('xxf09210', '15297954974xxf', '960690')
# 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
im = open('a.jpg', 'rb').read()
# 1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()
print(chaojiying.PostPic(im, 1902))
# print chaojiying.PostPic(base64_str, 1902) #此处为传入 base64代码
'''
web=Edge()
web.get('http://www.chaojiying.com/user/login/')
# 验证码处理 定位到验证码,screenshot_as_png以字节方式拿到验证码
img=web.find_element(by=By.XPATH,value='/html/body/div[3]/div/div[3]/div[1]/form/div/img').screenshot_as_png #chu's
chaojiying = Chaojiying_Client('xxf09210', '15297954974xxf', '960690')
dict_=chaojiying.PostPic(img, 1902)
verify_code=dict_['pic_str']
#用户登录
web.find_element(by=By.XPATH,value='/html/body/div[3]/div/div[3]/div[1]/form/p[1]/input').send_keys('xxf09210')
web.find_element(by=By.XPATH,value='/html/body/div[3]/div/div[3]/div[1]/form/p[2]/input').send_keys('15297954974xxf')
web.find_element(by=By.XPATH,value='/html/body/div[3]/div/div[3]/div[1]/form/p[3]/input').send_keys(verify_code)
time.sleep(4)
#登录
a=web.find_element(by=By.XPATH,value='/html/body/div[3]/div/div[3]/div[1]/form/p[4]/input')
# a.click()
time.sleep(3)