爬虫day7 豆瓣网图形验证登录,涉及selenium

from selenium import webdriver
import time
from lxml import etree
import requests
import base64

# 操作浏览器
driver = webdriver.Chrome()
url = 'https://accounts.douban.com/login?alias=&redir=https%3A%2F%2Fwww.douban.com%2F&source=index_nav&error=1001'
driver.get(url)

time.sleep(1)
driver.find_element_by_id('email').send_keys('18513342381')
time.sleep(1)
driver.find_element_by_id('password').send_keys('guanliu4199800')
time.sleep(1)

# 获取验证码相关信息
html_ele = etree.HTML(driver.page_source)
img_src = html_ele.xpath('//img[@id="captcha_image"]/@src')[0]
print(img_src)
# 获取图片信息
response = requests.get(img_src)
print(response.text)
# 转换成b64——str
b64_str = base64.b64encode(response.content)
print(b64_str)
# 根据headers dama-url form获取json数据,包含vcode
# 打码平台url
dmpt_url = 'http://yzmplus.market.alicloudapi.com/fzyzm'
headers = {
    'Authorization': 'APPCODE 55db8dae0acd4105b0d7a2fa5f00aad6',
}
form = {
    'v_pic':b64_str,
    'v_type':'cn',
}
response = requests.post(dmpt_url, form, headers=headers)
print(response.text)
v_code = response.json()['v_code']
# v_code = requests.post(dmpt_url,form,headers=headers).json()['v_code']
print(v_code)
# 输入vcode 点击登录
driver.find_element_by_id('captcha_field').send_keys(v_code)
time.sleep(1)
driver.find_element_by_class_name('btn-submit').click()
time.sleep(1)
driver.find_element_by_xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/a/span[1]').click()
time.sleep(1)

driver.find_element_by_xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/div/table/tbody/tr[4]/td/a').click()

# 获取cookie
cookies = driver.get_cookies()
cookies_list = []
for cookies_dict in cookies:
    cookies_str = cookies_dict['name'] + '=' + cookies_dict['value']
    cookies_list.append(cookies_str)
cookies = ';'.join(cookies_list)

# 新一轮获取 靠cookies
url = 'https://www.douban.com/accounts/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    'Cookie': cookies,
}
response = requests.get(url,headers=headers)
with open('cookielog.html','wb') as f:
    f.write(response.content)



 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值