前言
前几天想获取一些水质数据,但国家数据网站的数据都只是实时数据,没有历史数据。于是在阿里云上找了一个历史数据的接口。
实现了利用接口获取斜塘口2021年全年数据,并存储在mysql数据中。
工具:
需要提前准备一些工具
阿里云接口地址:国控地表水断面历史数据
在这个网址下有个调试,主要是后面有些自定义的参数对应的一些数据转换,在自己的代码中需要替换
数据库格式:
代码
框架如下图所示:
注意
处理数据:数据是以json格式返回,处理之后是python的字典格式。
将数据导入数据库:数据本身有缺失,为了方便之后的处理,将数据缺失时刻的时间存入数据库,方便之后进行数据分析时的数据处理。
代码部分
import urllib.request as urllib2
import ssl
import json
import pymysql
#连接数据库
def connect_mysql1(sta_tp_v,sta_do_v,chlorophyll,sta_ph_l,sta_an_v,sta_an_l,status_label,water_temp,sta_time,sta_tn_v,
sta_ph_v,turbidity,sta_pp_l,conductivity,water_l,sta_pp_v,algal_density,sta_do_l):
# 打开数据库连接
conn = pymysql.connect(host="你自己的数据库地址", port=3306, user="root", passwd="数据库密码", db="test")
print('连接成功')
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
sqlQuery=" INSERT INTO demo (sta_tp_v,sta_do_v,chlorophyll,sta_ph_l,sta_an_v,sta_an_l,status_label,water_temp," \
"sta_time,sta_tn_v,sta_ph_v,turbidity,sta_pp_l,conductivity,water_l,sta_pp_v,algal_density,sta_do_l) VALUE (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) "
value = (sta_tp_v,sta_do_v,chlorophyll,sta_ph_l,sta_an_v,sta_an_l,status_label,water_temp,sta_time,sta_tn_v,sta_ph_v,
turbidity,sta_pp_l,conductivity,water_l,sta_pp_v,algal_density,sta_do_l)
try:
cursor.execute(sqlQuery, value)
conn.commit()
print(sta_time+'数据插入成功!')
except pymysql.Error as e:
print("数据插入失败:" + e)
conn.rollback()
conn.close()
def connect_mysql2(time):
sta_time = time+':00:00'
# 打开数据库连接
conn = pymysql.connect(host="你自己的数据库地址", port=3306, user="root", passwd="你自己的数据库密码", db="test")
print('连接成功')
# 使用cursor()方法获取操作游标
cursor = conn.cursor()
sqlQuery = " INSERT INTO demo (sta_time) VALUE (%s) "
value = (sta_time)
try:
cursor.execute(sqlQuery, value)
conn.commit()
print(sta_time+'数据插入成功!')
except pymysql.Error as e:
print("数据插入失败:" + e)
conn.rollback()
conn.close()
#数据处理,如果有数据则将数据插入数据库,否则将时间插入数据库
def data(content,time):
a = json.loads(content)
if (a['data']):
sta_tp_v = str(a['data'][0]['sta_tp_v'])
sta_do_v = str(a['data'][0]['sta_do_v'])
chlorophyll = str(a['data'][0]['chlorophyll'])
sta_ph_l = str(a['data'][0]['sta_ph_l'])
sta_an_v = str(a['data'][0]['sta_an_v'])
sta_an_l = str(a['data'][0]['sta_an_l'])
water_temp = str(a['data'][0]['water_temp'])
status_label = str(a['data'][0]['status_label'])
sta_time = str(a['data'][0]['sta_time'])
sta_tn_v = str(a['data'][0]['sta_tn_v'])
sta_ph_v = str(a['data'][0]['sta_ph_v'])
turbidity = str(a['data'][0]['turbidity'])
sta_pp_l = str(a['data'][0]['sta_pp_l'])
conductivity = str(a['data'][0]['conductivity'])
water_l = str(a['data'][0]['water_l'])
sta_pp_v = str(a['data'][0]['sta_pp_v'])
algal_density = str(a['data'][0]['algal_density'])
sta_do_l = str(a['data'][0]['sta_do_l'])
print(sta_tp_v, sta_do_v, chlorophyll, sta_ph_l, sta_an_v, sta_an_l, status_label, water_temp, sta_time, sta_tn_v,
sta_ph_v, turbidity, sta_pp_l, conductivity, water_l, sta_pp_v, algal_density, sta_do_l)
connect_mysql1(sta_tp_v, sta_do_v, chlorophyll, sta_ph_l, sta_an_v, sta_an_l, status_label, water_temp, sta_time,
sta_tn_v, sta_ph_v, turbidity, sta_pp_l, conductivity, water_l, sta_pp_v, algal_density, sta_do_l)
else:
connect_mysql2(time)
#接口调用模块
def Main(time):
querys = 'city=' + city + '&province=%E4%B8%8A%E6%B5%B7%E5%B8%82&sta_time=' + time + '%3A00%3A00&staname=%E6%96%9C%E5%A1%98%E5%8F%A3&version=v1'
# 按照前面工具里面的参数进行替换
url = host + path + '?' + querys
request = urllib2.Request(url)
request.add_header('Authorization', 'APPCODE ' + appcode)
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
response = urllib2.urlopen(request, context=ctx)
content = response.read()
if (content):
print(content)
data(content,time)
appcode = '此处为你购买接口的appcode'
host = 'https://nawater.market.alicloudapi.com'
path = '/api/surface_water/data'
method = 'GET'
city='%E6%9D%BE%E6%B1%9F%E5%8C%BA'
#时间循环模块,自动调用2021年一整年的
for i in range(1,13):
if i in [1,3,5,7,8,10,12]:
for j in range(1,32):
for k in range(6):
time = '2021-'+str(i)+'-'+str(j)+'+'+str(4*k)
Main(time)
elif i ==2:
for j in range(1, 29):
for k in range(6):
time = '2021-' + str(i) + '-' + str(j) + '+' + str(4 * k)
Main(time)
else:
for j in range(1, 31):
for k in range(6):
time = '2021-' + str(i) + '-' + str(j) + '+' + str(4 * k)
Main(time)