通过CrawlSpider爬取网易社会招聘信息
1.创建工程
scrapy startproject 项目名称
2.创建crawlspider爬虫
scrapy genspider -t crawl 爬虫名 爬虫的范围.com
3.爬虫代码如下
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class WangyishezhaoSpider(CrawlSpider):
name = 'wangyishezhao'
allowed_domains = ['163.com']
start_urls = ['https://hr.163.com/position/list.do?postType=01']
rules = (
Rule(LinkExtractor(restrict_xpaths='//div[@class="m-page"]/a'), follow=True),
Rule(LinkExtractor(restrict_xpaths='//tbody/tr/td[1]/a'), callback='parse_item', follow=False),
)
def parse_item(self, response):
data_dict