项目名/spiders/爬虫名.py(爬虫,携带cookie登录):
# -*- coding: utf-8 -*-
import scrapy
import re
class RenrenSpider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['renren.com']
start_urls = ['http://www.renren.com/327550029/profile'] # 需要登陆后才能正常访问的url
# start_urls都是默认交给start_requests函数处理的,可以重写该方法添加请求头、cookie等。
def start_requests(self):
# 该cookie是登陆后获取的cookie字符串。
cookies = "anonymid=jcokuqturos8ql; depovince=GW; jebecookies=f90c9e96-78d7-4f74-b1c8-b6448492995b|||||; _r01_=1; JSESSIONID=abcx4tkKLbB1-hVwvcyew; ick_login=ff436c18-ec61-4d65-8c56-a7962af397f4; _de=BF09EE3A28DED52E6B65F6A4705D973F1383380866D39FF5; p=90dea4bfc79ef80402417810c0de60989; first_login_flag=1; ln_uact=mr_mao_hacker@163.com; ln_hurl=http://hdn.xnimg.cn/photos/hdn421/20171230/1635/main_