一.基本配置
1.项目名称
2.爬虫应用路径
SPIDER_MODULES = [‘Amazon.spiders’]
NEWSPIDER_MODULE = ‘Amazon.spiders’
3.客户端User-Agent请求头
USER_AGENT = ‘Amazon (+http://www.yourdomain.com)’
4.是否遵循爬虫协议
ROBOTSTXT_OBEY = False
5.是否支持cookie,cookiejar进行操作cookie
COOKIES_ENABLED = True
6.Telnet用于查看当前爬虫的
TELNETCONSOLE_ENABLED = False
TELNETCONSOLE_HOST = ‘127.0.0.1’
TELNETCONSOLE_PORT = [6023,]
7.全局设置使用的请求头
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
}