from selenium import webdriver
import time
from lxml import etree
import requests
import base64
# 操作浏览器
driver = webdriver.Chrome()
url = 'https://accounts.douban.com/login?alias=&redir=https%3A%2F%2Fwww.douban.com%2F&source=index_nav&error=1001'
driver.get(url)
time.sleep(1)
driver.find_element_by_id('email').send_keys('18513342381')
time.sleep(1)
driver.find_element_by_id('password').send_keys('guanliu4199800')
time.sleep(1)
# 获取验证码相关信息
html_ele = etree.HTML(driver.page_source)
img_src = html_ele.xpath('//img[@id="captcha_image"]/@src')[0]
print(img_src)
# 获取图片信息
response = requests.get(img_src)
print(response.text)
# 转换成b64——str
b64_str = base64.b64encode(response.content)
print(b64_str)
# 根据headers dama-url form获取json数据,包含vcode
# 打码平台url
dmpt_url = 'http://yzmplus.market.alicloudapi.com/fzyzm'
headers = {
'Authorization': 'APPCODE 55db8dae0acd4105b0d7a2fa5f00aad6',
}
form = {
'v_pic':b64_str,
'v_type':'cn',
}
response = requests.post(dmpt_url, form, headers=headers)
print(response.text)
v_code = response.json()['v_code']
# v_code = requests.post(dmpt_url,form,headers=headers).json()['v_code']
print(v_code)
# 输入vcode 点击登录
driver.find_element_by_id('captcha_field').send_keys(v_code)
time.sleep(1)
driver.find_element_by_class_name('btn-submit').click()
time.sleep(1)
driver.find_element_by_xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/a/span[1]').click()
time.sleep(1)
driver.find_element_by_xpath('//*[@id="db-global-nav"]/div/div[1]/ul/li[2]/div/table/tbody/tr[4]/td/a').click()
# 获取cookie
cookies = driver.get_cookies()
cookies_list = []
for cookies_dict in cookies:
cookies_str = cookies_dict['name'] + '=' + cookies_dict['value']
cookies_list.append(cookies_str)
cookies = ';'.join(cookies_list)
# 新一轮获取 靠cookies
url = 'https://www.douban.com/accounts/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Cookie': cookies,
}
response = requests.get(url,headers=headers)
with open('cookielog.html','wb') as f:
f.write(response.content)