要点:1.设定UA
2.创建csv_item_exporter.py 设定csv
3.settings.py设定好参数
4.begin启动程序
目标网址:http://hr.tencent.com/position.php?keywords=&tid=0&star
spider.py(主体很简单)
import os,io,sys,re from scrapy.spider import Spider from scrapy.selector import Selector from lianxi.items import LianxiItem from scrapy import Request class LianxiSpider(Spider): name = "lianxi" allowed_domains = ["hr.tencent.com"] start_urls = [] # 起始urls列表 for i in range(0,200,10): #爬取了20页 url='https://hr.tencent.com/position.php?&start='+str(i)+'#a' start_urls.append(url) def parse(self, response): geduan=response.xpath('//table[@class="tablelist"]/tr[not(@class="h"or@class="f")]') item=LianxiItem() for zhiwei in geduan: </