python2.7代码实现:
爬虫主文件:
# -*- coding: utf-8 -*-
import sys
from scrapy.http import Request
#python语言转码,非必要
reload(sys)
sys.setdefaultencoding("utf-8")
import scrapy
#定义爬虫类,必须继承scrapy.Spider
class DmozSpider(scrapy.Spider):
name = "itcast" #设置爬虫名称
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'} #设置浏览器用户代理
#用start_requests()方法,代替start_urls
def start_requests(self):
return [Request('https://www.baidu.com',meta={'cookiejar':1},callback=self.parse)]
#parse回调函数
def parse(self, response):
Cookie = response.headers.getlist('Cookie') #获取响应cookie
print 1111111111111111111111
print Cookie
print 1111111111111111111111
return 0