# -*- coding: utf-8 -*-
from datetime import datetime
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from ArticleSpider.items import LagouJobItem, LagouJobItemLoader
from ArticleSpider.utils.common import get_md5
from ArticleSpider.settings import SQL_DATETIME_FORMAT
class LagouSpider(CrawlSpider):
name = 'lagou'
allowed_domains = ['www.lagou.com']
start_urls = ['https://www.lagou.com/']
custom_settings = {
"COOKIES_ENABLED": False,
"DOWNLOAD_DELAY": 1,
'DEFAULT_REQUEST_HEADERS': {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'Cookie': 'user_t
python scrapy爬虫 CrawlSpider 拉钩招聘网302重定向问题解决方案 , 修改setting信息,添加cookie请求
最新推荐文章于 2024-04-24 16:55:30 发布