使用Cookie模拟已登录状态
import scrapy
class LoginByCookie(scrapy.Spider):
"""
模拟登录方式一:直接使用cookie登录
登录慕课网
"""
name = 'login_by_cookie'
allowed_domains=['www.imooc.com']
start_urls=[]
def start_requests(self):
"""重写start_requests()方法"""
home_url='https://www.imooc.com/u/2346025'
login_cookie={'imooc_uuid':'c13c8cb7-442a-430e-a2c1-78d91c347b67',
'imooc_isnew_ct':'1515076153',
'imooc_isnew':'2',
'loginstate':'1',
'apsid':'NhMDY2ZDFmODhmYWQ5ZmQ2NDI3ZDg0OTU0NWM3NTQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMjM0NjAyNQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4MDkwMjE4MjNAcXEuY29tAAAAAAAAAAAAAAAAAAAAAGI3ZmJjOTUxMTU2YjBlOTVlOTIxYzM1ZDk0OTVmOGNhW3FQWltxUFo%3DYm',
'PHPSESSID':'vd48nsltdovbbifsn48pu15763',
'IMCDNS':'0',
'Hm_lvt_f0cfcccd7b1393990c78efdeebff3968':'1515076155,1515221269,1515746784,1516641134',
'Hm_lpvt_f0cfcccd7b1393990c78efdeebff3968':'1516641134',
'cvde':'5a661b6d0246d-3'
}
yield scrapy.FormRequest(
url=home_url,cookies=login_cookie,callback=self.parse_page)
def parse_page(self,response):
print(response.body.decode('utf-8'))
print(response.xpath('//title/text()').extract_first())
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
import scrapy
class LoginByFormData(scrapy.Spider):
"""模拟登录方式二: 使用Form表单登录"""
name = 'login_by_formdata'
start_urls = ['http://www.example.com/users/login.php']
def parse(self, response):
return scrapy.FormRequest.from_response(
response,
formdata={'username': 'john', 'password': 'secret'},
callback=self.after_login
)
# 等价于
# return [scrapy.FormRequest(
# url=login_url,
# formdata={'username': 'john', 'password': 'secret'},
# callback=self.after_login
# )]
def after_login(self, response):
# check login succeed before going on
if "authentication failed" in response.body:
self.logger.error("Login failed")
return
# continue scraping with authenticated session...