1. 先用帐号密码登录人人网,查看元素,刷新页面,network中找第一个网页请求,并查看请求cookie
2.复制粘贴修改格式
爬虫代码如下:
-*- coding: utf-8 -*-
import scrapy
class RenrenSpider(scrapy.Spider):
name = 'renren'
allowed_domains = ['renren.com']
start_urls = ["http://renren.com/410043129/profile",
"http://renren.com/429732223/profile"
]
cookies = {
"__utma" : "151146938.1737961196.1534385069.1534385069.1534385069.1",
"__utmz" : "151146938.1534385069.1.1.utmcsr=browse.renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/index.jsp",
"_de" : "BF09EE3A28DED52E6B65F6A4705D973F1383380866D39FF5",
"_ga" : "GA1.3.1737961196.1534385069",
"_gid" : "GA1.3.9522240.1534668034",
"_r01_" : "1",
"anonymid" : "jkvx82ss-iyi7ne",
"BAIDU_SSP_lcr" : "https://www.baidu.com/link?url=HMMNDFmJLBzkGUMyPo55yS1FpfjOUn65KQli20x3dkC&wd=&eqid=c087742b0005123a000000065b79662c",
"depovince" : "FJ",
"first_login_flag" : "1",
"Hm_lpvt_966bff0a868cd407a416b4e3993b9dc8" : "1534730971",
"Hm_lvt_966bff0a868cd407a416b4e3993b9dc8" : "1534400644,1534668032",
"ick_login" : "184ba40e-14f4-46f9-be87-b691f5e6d65f",
"id":"327550029","jebe_key":"41ef5f5a-60de-4db2-b40d-358673eb9010|c13c37f53bca9e1e7132d4b58ce00fa3|1534681796862|1|1534682728014",
"jebecookies" : "c0b8db17-99cf-4e52-b9f5-b7bd5ff1cc4f|||||",
"JSESSIONID" : "abcc8bgclekXJ5IEXvrvw",
"ln_hurl": "http://hdn.xnimg.cn/photos/hdn521/20180807/1240/main_GRfq_0ab200000ea8195a.jpg",
"ln_uact" : "mr_mao_hacker@163.com",
"loginfrom": "syshome",
"p" : "d4c676681b0bed76f21ec7707a4af07f9",
"societyguester" : "49e84cd2583043a3a02008e91f360fa39",
"t" : "49e84cd2583043a3a02008e91f360fa39",
"UM_distinctid" : "1654166a0a323-026bdff623a6fa8-76246752-100200-1654166a0a4206",
"wp_fold" : "0",
"xnsid" : "aae69832"}
def start_requests(self):
for url in self.start_urls:
settings.py 要设置header:
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0'
# 'Accept-Language': 'en',
}
然后就可以运行了