方法一 - 登录网站手动抓取Cookie
方法二 - Cookie处理为字典
方法三 - Request模块处理Cookie
1.先post到web站点
post_url=‘http://www.renren.com/PLogin.do’
2.再get要抓取的页面
get_url=‘http://www.renren.com/972496128/profile’
import requests
class RenrenLogin(object):
def __init__(self):
self.post_url = 'http://www.renren.com/PLogin.do'
self.get_url = 'http://www.renren.com/972496128/profile'
# 实例化session对象
self.session = requests.session()
def parse_html(self):
# 1.先post
data = {'email': '', 'password': ''}
self.session.post(url=self.post_url, data=data)
# 2.再get
html = self.session.get(url=self.get_url).text
print(html)
if __name__ == '__main__':
spider = RenrenLogin()
spider.parse_html()
总结
1.request.get()
url
params:{}
proxies:{}
auth:() #web客户端验证
verify:Ture | False #https网站未做CA认证
timeout:n
cookies:{}
2.requests.post()
data:{} form表单数据
3.s=request.session()
s.post()
s.get()