settings.py
-
settings.py
BOT_NAME ='TouPIc' # 项目名 SPIDER_MOULES = ['TouPic.spiders'] # 爬虫的位置 NEWSPIDER_MODULE= 'TouPic.spiders' #新建一个爬虫会在的位置 USER_AGENT= '' # 浏览器的表示 ROBOTSTST_OBEY= False # 君子协议 CONCURRENT_REQUESTS = 32 #并发请求 DOWNLOAD_DELAY = 3 #下载延迟 # DOWNLOAD_DELAY CONCURRENT_REQUESTS_PER_DOMAIN = 16 #每个域名的并发请求数 CONCURRENT_REQUESTS_PER_IP = 16 # 每个IP的并发请求数 COOKIES_ENABLED = False # 是否启动COOKIE,默认开启 TELENTCONSOLE_ENABLED = False #插件 DEFAULT_REQUEST_HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en', } # 默认请求头 SPIDER_MIDDLEWARES = { 'TouPic.middlewares.ToupicSpiderMiddleware': 543, } # 爬虫中间件 DOWNLOADER_MIDDLEWARES = { 'TouPic.middlewares.ToupicDownloaderMiddleware': 543, } # 下载中间件 EXTENSIONS = { 'scrapy.extensions.telnet.TelnetConsole': None, } #扩展类 ITEM_PIPELINES = { 'TouPic.pipelines.ToupicPipeline': 300, } # 数据存储 AUTOTHROTTLE_ENABLED = True # 自动限速 HTTPCACHE_ENABLED = True # HTTP缓存