Scrapy登录方式
- 方式一:直接携带cookie
- 方式二:找到发送post请求的URL地址,带上信息,发送请求
人人练习
创建爬虫项目
scrapy startproject renren
renrenlogin.py
import scrapy
from ..settings import DEFAULT_REQUEST_HEADERS
class RenrenloginSpider(scrapy.Spider):
name = 'renrenlogin'
allowed_domains = ['renren.com']
start_urls = ['http://www.renren.com/269782969/profile']
def start_requests(self):
str_cookies = '{}'.format(cookies字符串)
cookies = {}
for i in str_cookies.split(';'):
cookies[i.split('=')[0]] = i.split('=')[1]
print(cookies)
yield scrapy.Request(
url=self.start_urls[0],
headers=DEFAULT_REQUEST_HEADERS,
cookies = cookies,
callback=self.parse
)
def parse(self, response):
with open('renren.html', 'w', encoding='utf-8') as f:
f.write(response.body.decode())