本案例主要是为了获取某城市的历史天气数据,字段包含日期,最低气温,最高气温,风向,风力,天气状况,空气质量指标值,空气质量等级和空气质量说明.
#导入第三方包
import requests
import time
import random
import pandas as pd
import re
#构造请求头
headers = {
"Accept":"*/*",
"Accept -Enconding":"gzip,deflate",
"Accept-Language":"zh-CN,zh;q=0.9",
"conection":"keep-alive",
"User-Agent":"Mozilla/5.0 (windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/63.0.3236.0 Safari/537.36"
}
#生成所有需要抓取的链接
urls = []
for year in range(2012,2019):
for month in range(1,13):
if year <= 2016:
urls.append("http://tianqi.2345.com/t/wea_history/js/58362_%s%s.js"
%(year,month))
else:
if month<10:
urls.append("http://tianqi.2345.com/t/wea_history/js/58362_%s%s.js"
%(year,month,year,month))
#循环并通过正则匹配获取相关数据
info = []
for url in urls:
seconds random.randint(3,6)
response = requests.get(usl,headers = headers).text #发送url链接的请求,并返回响应数据
ymd =re.findall("ymd:"(.*?)",",response) #正则表达式获取日期数据
high = re.findall("bWendu:"(.*?)",",response) #正则表达式获取最高气温数据
low = re.findall("yWendu:"(.*?)",",response) #正则表达式获取最低气温数据
tianqi = re.findall("tianqi:"(.*?)",",response) #正则表达式获取天气状况数据
fengxiang = re.findall("fengxiang:"(.*?)",",response) # 正则表达式获取风向数据
fengli = re.findall("fengli:"(.*?)",",response) #正则表达式获取风力数据
aqi = re.findall("aqi:"(.*?)",",response) #正则表达式获取空气质量指标数据
aqiInfo = re.findall("aqiInfo:"(.*?)",",response) #正则表达式获取空气质量说明数据
aqiLevel = re.findall("aqiLevel:"(.*?)",",response) #正则表达式获取空气质量水平数据
#犹豫 2012-2015没有空气质量相关的数据,故需要分开处理
if len(aqi) == 0:
aqi = None
aqiInfo = None
aqiLevel = None
info.append(pd.DataFrame({"ymd":ymd,"high":high,"low":low,"tianqi":tianqi,
"fengxiang":fengxiang,"fengli":fengli,"aqi":aqi,
"aqiInfo":aqiInfo,"aqiLevel":aqiLevel}))
else:
info.append(pd.DataFrame({"ymd":ymd,"high":high,"low":low,"tianqi":tianqi,
"fengxiang":fengxiang,"fengli":fengli,"aqi":aqi,
"aqiInfo":aqiInfo,"aqiLevel":aqiLevel}))
time.sleep(seconds) #每循环一次,都随机停顿几秒
#将存储的所有天气数据进行合并,生成数据表格
weather = pd.concat(info)
#数据导出
weather.to_csv("weather.csv",index = False)
#本例题中编译后说aqiLevel 说是数据个数不相同,导致DataFrame报错,本人在用的时候先屏蔽了这项参数。先学习大概用法,后面再纠正,或者有调试好的朋友可以指导一下。