一、功能介绍
基于python爬虫下载全国温室数据系统的气象数据http://data.sheshiyuanyi.com/WeatherData/
二、网页分析
三、代码
例:下载兴海(站点编号52943)气象站2005年的日最高气温数据。
# coding=gbk
import requests
import os
# Getfile类的代码引用自https://blog.51cto.com/eddy72/2106091?cid=732015
class Getfile: # 下载文件
def __init__(self, url):
self.url = url
self.header_flag = False # 当为True时,设置header,断点续传
def downfile(self, filename):
self.headers = {}
self.mode = 'wb'
if os.path.exists(filename) and self.header_flag:
self.headers = {'Range': 'bytes=%d-' % os.path.getsize(filename)}
self.mode = 'ab'
self.r = requests.get(self.url, stream=True, headers=self.headers)
with open(filename, self.mode) as code:
for chunk in self.r.iter_content(chunk_size=1024): # 边下载边存硬盘
code.write(chunk)
def single_download(paras):
# 确认访问地址
url2 = "http://data.sheshiyuanyi.com/WeatherData/php/downloadWeatherData.php"
# 请求头
header = {
"User-Agent": "Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 78.0.3904.108Safari / 537.36"
}
requests.get(url2, headers=header, params=paras)
filename = "{0}_{1}_{2}_{3}.xlsx".format(paras["staNum"], paras["subIndex"], paras["year"], paras["month"])
down_url = "http://data.sheshiyuanyi.com/WeatherData/datafile/{0}".format(filename)
temp = Getfile(url=down_url)
temp.downfile(filename)
if __name__ == "__main__":
# 确定请求参数
in_paras = {"action": "one",
"staNum": "52943",
"index": "air_temperature",
"subIndex": "max_tem",
"year": 2005,
"month": 0}
single_download(paras=in_paras)
print("Completed: {0}_{1}_{2}_{3}.xlsx".format(in_paras["staNum"], in_paras["subIndex"], in_paras["year"], in_paras["month"]))
不同气象要素的请求参数index和subIndex如下:
dict_index = {"平均气温": ("air_temperature", "avg_tem"),
"日最高气温": ("air_temperature", "max_tem"),
"日最低气温": ("air_temperature", "min_tem"),
"降水": ("precipitation", "pre_20_20"),
"平均风速": ("wind", "avg_win"),
"最大风速": ("wind", "max_win"),
"日照时数": ("sunshine_duration", "ssd"),
"平均地表气温": ("ground_surface_temperature", "avg_gst"),
"日最高地表气温": ("ground_surface_temperature", "max_gst"),
"日最低地表气温": ("ground_surface_temperature", "min_gst"),
"平均相对湿度": ("relative_humidity", "avg_rhu"),
"最小相对湿度": ("relative_humidity", "min_rhu"),
"平均本站气压": ("air_pressure", "avg_prs"),
"日最高本站气压": ("air_pressure", "max_prs"),
"日最低本站气压": ("air_pressure", "min_prs"),
"小型蒸发量": ("evaporation", "min_evp"),
"大型蒸发量": ("evaporation", "max_evp"),
"日累计辐射": ("solar_radiation", "sr")
}
参考自网页审查元素
var weatherIndex = [
];
weatherIndex.tem = {
name: "气温",
unit:"℃",
precision:0.1,
value:"air_temperature",
subIndex: [{name:"平均气温",value:"avg_tem"},{name:"日最高气温",value:"max_tem"},{name:"日最低气温",value:"min_tem"}]
};
weatherIndex.acc = {
name: "积温",
unit:"d·℃",
precision:0.1,
value:"accumulated_temperature",
subIndex: []
};
weatherIndex.pre = {
name: "降水",
unit:"mm",
precision:0.1,
value:"precipitation",
subIndex: [{name:"20至20时降水量",value:"pre_20_20"}/*,{name:"20至8时降水量",value:"pre_20_8"},{name:"8至20时降水量",value:"pre_8_20"}*/]
};
weatherIndex.win = {
name: "风速",
unit:"m/s",
precision:0.1,
value:"wind",
subIndex: [{name:"平均风速",value:"avg_win"},{name:"最大风速",value:"max_win"}/*,{name:"极大风速",value:"ext_win"}*/]
};
weatherIndex.ssd = {
name: "日照时数",
unit:"h",
precision:0.1,
value:"sunshine_duration",
subIndex: [{name:"日照时数",value:"ssd"}]
};
weatherIndex.gst = {
name: "0cm地温",
unit:"℃",
precision:0.1,
value:"ground_surface_temperature",
subIndex: [{name:"平均地表气温",value:"avg_gst"},{name:"日最高地表气温",value:"max_gst"},{name:"日最低地表气温",value:"min_gst"}]
};
weatherIndex.ffs = {
name: "无霜期",
unit:"天",
precision:1,
value:"ground_surface_temperature",
subIndex: []
};
weatherIndex.rhu = {
name: "相对湿度",
unit:"%",
precision:1,
value:"relative_humidity",
subIndex: [{name:"平均相对湿度",value:"avg_rhu"},{name:"最小相对湿度",value:"min_rhu"}]
};
weatherIndex.prs = {
name: "本站气压",
unit:"hPa",
precision:0.1,
value:"air_pressure",
subIndex: [{name:"平均本站气压",value:"avg_prs"},{name:"日最高本站气压",value:"max_prs"},{name:"日最低本站气压",value:"min_prs"}]
};
weatherIndex.evp = {
name: "蒸发",
unit:"mm",
precision:0.1,
value:"evaporation",
subIndex: [{name:"小型蒸发量",value:"min_evp"},{name:"大型蒸发量",value:"max_evp"}]
};
weatherIndex.sr = {
name: "太阳辐射",
unit:"MJ/m^2",
precision:0.01,
value:"solar_radiation",
subIndex: [{name:"日累计辐射",value:"sr"}]
};