创建mysql数据库,从微波辐射计数据文件_lv2中提取,廓线数据,积分数据,云底高度及雨状态,并上传数据到数据库,
lv2文件格式说明
csv文件件,逗豆分隔,前20行为数据说明,数据从22行开始,每7行为一组,数据组第一行,有雨状态,第二行有温度廓线,第三行,水汽廓线,第四行液态水廓线,第五行相对湿度廓线,第六行,水汽积分,液态水积分,云底高度。每行前面有时间,时间因文件生成不一样,有两种区别,程序里分别处理。数据文件件如图所示,廓线资料有58层。
1. 数据库创建
#Author: Wu dongqiao
import pymysql as msq
# 打开数据库连接
db = msq.connect("localhost", "root", "12345678", "wbfs", charset='utf8' )
# 使用cursor()方法获取操作游标
cursor = db.cursor()
# 如果数据表已经方法删除表。
cursor.execute("DROP TABLE IF EXISTS prof")
cursor.execute("DROP TABLE IF EXISTS intdata")
cursor.execute("DROP TABLE IF EXISTS cloud_base")
cursor.execute("DROP TABLE IF EXISTS rain")
# 创建廓线数据表
sql = """CREATE TABLE prof(
id int AUTO_INCREMENT,
Data_time DATETIME,
Tprof varchar(500),
Vprof varchar(500),
Lprof varchar(500),
RHprof varchar(500),
PRIMARY KEY(id,Data_time)
)"""
cursor.execute(sql)
#创建水汽积分数据表
sql='''CREATE TABLE intdata(
id int AUTO_INCREMENT,
Data_time DATETIME,
int_Vapor varchar(10),
int_Liquid varchar(10),
PRIMARY KEY(id,Data_time)
)'''
cursor.execute(sql)
#创建云底高度数据表
sql="""CREATE TABLE cloud_base(
id int AUTO_INCREMENT,
Data_time DATETIME,
Cloud_base FLOAT(3,2),
PRIMARY KEY(id,Data_time)
)"""
cursor.execute(sql)
#创建雨状态数据表
sql="""CREATE TABLE rain(
id int AUTO_INCREMENT,
Data_time DATETIME,
Rain int(1),
PRIMARY KEY(id,Data_time)
)"""
cursor.execute(sql)
# 关闭数据库连接
cursor.close()
db.close()
2. 数据解析
#Author: Wu dongqiao
#读取微波辐射数据并反回mysql格式的形式值
import pandas as pd
import numpy as np
def readwbfsdata(wbfs_file_path):
#读取廓线数据
skip_rows=21 #设置始读行
use_cols=tuple(i for i in range(0,62)) #设置使用的列数
cols_name=list(range(0,62)) #设置使用的列名(数据列)
#使用pandas_read_csv读取数据
data=pd.read_csv(wbfs_file_path,skiprows=skip_rows,dtype=str,delimiter=',',usecols=use_cols,names=cols_name)#
#读取时间块
Data_time=data[::7][1]
time=[]
#根据不同格式的时间字符转成YYYY-MM-DD HH:MM:SS
if len(Data_time.values[0].split('/')[0])==4:
for i in range(len(Data_time)):
# time.append(Data_time.values[i][-5:])
time.append(('20'+Data_time.values[i][8:10]+'-'+Data_time.values[i][2:7]).replace('/','-')+Data_time.values[i][-6:]+':00')
else:
for i in range(len(Data_time)):
# time.append(Data_time.values[i][-8:-3])
time.append(('20'+Data_time.values[i][6:8]+'-'+Data_time.values[i][:5]+Data_time.values[i][-9:]).replace('/','-'))
return data,time
def readprodata(wbfs_file_path):
data,time=readwbfsdata(wbfs_file_path)
# 读取温度廓线数据Temperature (K)
Tprof_data = data.iloc[1:, 4:][::7]
# 读取水汽廓线数据Vapor
Vprof_data = data.iloc[2:, 4:][::7]
# 读取液态水廓线数据Liquid
Lprof_data = data.iloc[3:, 4:][::7]
# 读取相对温度廓线数据Relative Humidity
RHprof_data = data.iloc[4:, 4:][::7]
Tdata = []
Vdata = []
RHdata = []
Ldata = []
#将廓线数据用,连起来,变成一条数据
for i in range(len(time)):
Tdata.append(','.join(Tprof_data.values[i]))
Vdata.append(','.join(Vprof_data.values[i]))
RHdata.append(','.join(RHprof_data.values[i]))
Ldata.append(','.join(Lprof_data.values[i]))
return time,Tdata,Vdata,RHdata,Ldata
def readintdata(wbfs_file_path):
data, time = readwbfsdata(wbfs_file_path)
# 读取水汽积分
int_Vapor = data.iloc[5:, 3][::7].values
# 读取液态水积分
int_Liquid = data.iloc[5:, 4][::7].values
return time,int_Vapor,int_Liquid
def readCloudbase(wbfs_file_path):
data, time = readwbfsdata(wbfs_file_path)
# 读取云底高度
Cloud_base = data.iloc[5:, 5][::7].astype(float).values
return time,Cloud_base
def readRain(wbfs_file_path):
data, time = readwbfsdata(wbfs_file_path)
# 读取雨状态
Rain = data[::7][7].astype(int).values
return time,Rain
def getData(wbfs_file_path):
#调用函数获得返回值
prof=readprodata(wbfs_file_path)
intdata = readintdata(wbfs_file_path)
Cloud_base=readCloudbase(wbfs_file_path)
Rain=readRain(wbfs_file_path)
#转置成一条SQL数据
tempf = ()
tempi = ()
tempc = ()
tempr = ()
for i in range(len(prof[0])):
#tuple元组连接
tempf=tempf+(tuple(np.array(prof).T[i]),)
tempi=tempi+(tuple(np.array(intdata).T[i]),)
tempc=tempc+(tuple(np.array(Cloud_base).T[i]),)
tempr=tempr+(tuple(np.array(Rain).T[i]),)
#廓线数据
prof=tempf
#水汽积分数据
intdata=tempi
#云底高度数据
Cloud_base =tempc
#雨状态数据
Rain =tempr
return prof,intdata,Cloud_base,Rain
3 . 上传数据
#Author: Wu dongqiao
# coding:utf-8
import pymysql
import wbfsread as wb
import tkinter.filedialog
#设置微波辐射数据路径选择要读取的_lv2.CSV文件
#wbfs_file_path='/home/python/Desktop/data/wbfs/2018-07-02_00-04-11_lv2.csv'
wbfs_file_path=tkinter.filedialog.askopenfilename()
#若没有选则退出
if wbfs_file_path==():
exit()
# 打开数据库连接
db = pymysql.connect("localhost", "root", "12345678", "wbfs",charset='utf8')
# 使用cursor()方法获取操作游标
cursor = db.cursor()
# SQL 插入语句
#str_to_date(\'%s\','%%Y-%%m-%%d %%H:%%i:%%s')
sql1 = "INSERT INTO prof(Data_time,Tprof,Vprof,Lprof,RHprof) VALUES (%s,%s,%s,%s,%s)"
sql2 = "INSERT INTO intdata(Data_time,int_Vapor,int_Liquid) VALUES (%s,%s,%s)"
sql3 = "INSERT INTO cloud_base(Data_time,Cloud_base) VALUES (%s,%s)"
sql4 = "INSERT INTO rain(Data_time,Rain) VALUES (%s,%s)"
# tuple格式,廓线、积分、云底高度、雨状态数据
P,I,C,R= wb.getData(wbfs_file_path)
try:
# 执行sql语句
cursor.executemany(sql1, P)
cursor.executemany(sql2, I)
cursor.executemany(sql3, C)
cursor.executemany(sql4, R)
# 提交到数据库执行
db.commit()
except Exception as e:
# 如果发生错误则回滚
print(e)
db.rollback()
# # 关闭游标
cursor.close()
# 关闭数据库连接
db.close()