目标网站:
http://www.nitrafficindex.com/,此例只是获取了宁波的数据,若需获取其他城市的数据请另行修改
代码部分
class SiweiTrafficSpider(crawler.BaseCrawlSpider):
# 设置时间间隔
custom_settings = {'DOWNLOAD_DELAY': 3}
name = "siwei_traffic"
def __init__(self,*args, **kwargs):
super(SiweiTrafficSpider, self).__init__(*args, **kwargs)
self.allowed_domains+= ["nitrafficindex.com"]
def start_requests(self):
# 页数循环
for i in range(1,5):
url = 'http://www.nitrafficindex.com/traffic/getRoadIndex.do'
# 获取每一页中的rows中的数据
form_data = {
'areaCode': '330200',
'roadLevel': '1, 2, 3, 4',
'page': str(i),
'rows': '10',
}
# 抛出请求
yield scrapy.FormRequest(
url,
formdata=form_data,
callback=self.parse,
)
def parse(self, response):
try:
for i in range(0,10):
jsobj = json.loads(response.text)
# 获取roads的id,根据id进入下一层从而获取每一行的数据
id1 = jsobj['rows'][i]["id"]
# 获取道路等级
roadgrade = jsobj['rows'][i]["roadGrade"]
road_id = jsobj['ro