当我们学python爬虫一段时间后会发现许多网站如果不登录的话,数据你是获取不下来的。这时候就需要想其他办法了。在这里小博向大家介绍的实例就是通过selenium模拟浏览器登录,并截取验证码图片上传到云打码解码,从而实现网站的登录的
import time
from selenium import webdriver
from YDMHTTP import yanzheng #这里指的是云打码代码,下边会介绍到
from PIL import Image
from selenium.webdriver.support.ui import WebDriverWait
# Project Leader:刘
import time
from selenium import webdriver
from YDMHTTP import yanzheng
from PIL import Image
from selenium.webdriver.support.ui import WebDriverWait
# 调用谷歌浏览器
diver = webdriver.Chrome()
# 打开网页
diver.get('http://t.tangjiu.com/Login?ReturnUrl=%2f')
#输入账号
username = WebDriverWait(diver,10).until(lambda diver:diver.find_element_by_id('user'))
username.send_keys('dukanglaojiu')
# 输入密码
password = WebDriverWait(diver,10).until(lambda diver:diver.find_element_by_id('pass'))
password.send_keys('密码')
# 定位验证码标签,对验证码实现截图功能
captcha = WebDriverWait(diver,10).until(lambda diver:diver.find_element_by_id('imgYanZhengMa'))
# 截取整个页面
diver.save_screenshot('page.png')
# 获取验证码图片的x,y坐标以及自身的宽度和高度
left = captcha.location['x']
top = captcha.location['y']
right = captcha.location['x'] + captcha.size['width']
bottom = captcha.location['y'] + captcha.size['height']
print('right',right)
print('bottom',bottom)
# 截取验证码并保存到本地
img = Image.open('page.png')
img = img.crop((left,top,right,bottom))
img.save('captcha.png')
# 将截取的验证码上传到云打码进行在线识别
text = yanzheng('captcha.png')
print('识别结果:',text)
# 定位验证码的输入框
captcha_input = WebDriverWait(diver,10).until(lambda diver:diver.find_element_by_id('vccode'))
captcha_input.send_keys(text)
# 点击登录按钮
diver.find_element_by_id('BtnLoadByPass').click()
time.sleep(1)
# 点击经销商代理意向
WebDriverWait(diver, 10).until(
lambda diver: diver.find_element_by_xpath('//d