这是我自己写的构造参数爬取豆瓣,能进行登入,但是获取cookies,去访问就不行,也不知道哪里有问题希望老师不忙的时候帮忙看一下谢谢
from lxml import etree
from PIL import Image
import requests
try:
import cookielib
except :
import http.cookiejar as cookielib
session=requests.session()
session.cookies=cookielib.LWPCookieJar(filename=‘cookies.txt’)
try:
session.cookies.load(ignore_discard=True,ignore_expires=True)
except:
print(‘cookie 未能加载进来’)
url="https://accounts.douban.com/login"
headers={
‘User-Agent’:‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36’
}
def get_captcha(account,password):
response=requests.get(url,headers=headers)
selector=etree.HTML(response.text)
captcha=selector.xpath(’//div[@class=“captcha_block”]/input[2]/@value’)
for i in captcha:
captcha_url='https://www.douban.com/misc/captcha?’+‘id=%s’%i+’&size=s’
response=session.get(captcha_url,headers=headers)
with open('captcha_value.jpg','wb')as f:
f.write(response.content)
f.close()
try:
im=Image.open('captcha_value.jpg')
im.show()
im.close()
except :
pass
captcha_value=input('请输入验证码')
post_data={
'source': 'index_nav',
'redir': 'https://www.douban.com/',
'form_email': account,
'form_password': password,
'captcha-solution': captcha_value,
'captcha-id': captcha,
'login':'登录' ,
}
response_text=session.post(url,data=post_data,headers=headers)
print(response_text.text)
session.cookies.save()
def get_index():
response=session.get(“https://accounts.douban.com/login”,headers=headers)
with open(‘inex_page.html’,‘wb’)as f :
f.write(response.text.encode(‘utf-8’))
print(‘ok’)
if name==“main”:
get_captcha(‘178659130xx’,‘xiaolangxxx3’)
get_index()![图片描述](http://img.mukewang.com/szimg/5c00d2ee0001870913660768.jpg)