爬取https://www.aqistudy.cn/historydata 网站的空气质量报告,爬取的数据以csv文件生成
scrapy startproject air_quality 创建scrapy项目
scrapy genspider api_history_spider https://www.apistudy.cn/historydata/index.php 编写spider
文件目录如图所示
seetings.py
1 ITEM_PIPELINES = { 2 'air_quality.pipelines.AirQualityPipeline': 300, 3 }
items.py
1 import scrapy 2 3 4 class AirQualityItem(scrapy.Item): 5 # define the fields for your item here like: 6 # name = scrapy.Field() 7 city_name = scrapy.Field() # 城市名称 8 record_date = scrapy.Field() # 检测日期 9 aqi_val = scrapy.Field() # AQI 10 range_val = scrapy.Field() # 范围 11 quality_level = scrapy.Field() # 质量等级 12 pm2_5_val = scrapy.Field() # PM2.5 13 pm10_val = scrapy.Field() # PM10 14 so2_val = scrapy.Field() # SO2 15 co_val = scrapy.Field() # CO 16 no2_val = scrapy.Field() # NO2 17 o3_val = scrapy.Field() # O3 18 rank = scrapy.Field() # 排名
pipelines.py
1 from scrapy.exporters import CsvItemExporter 2 3 class AirQualityPipeline(object): 4 5 def open_spider(se