前言
因实习工作需要从葵花8号卫星获取实时林火数据存入数据库,特写此文记录一下工作过程,以供有同样需求的朋友参考。
Himawari-8卫星的数据介绍和账号注册可以参考这篇文章葵花8号卫星数据简介与下载(一)——数据介绍与FTP下载。
下载部分的代码在这篇文章使用Python自动完成Himawari-8(葵花8)卫星AOD数据下载与解析(转TIFF)的基础上做了略微修改,万分感谢( _ _)ノ。
数据与程序说明
1.葵花8号WLF\bet文件夹下的数据说明:
-
葵花8号(Himawari-8)卫星每隔十分钟会上传一次csv文件,记载其对林火的监控数据
-
文件内的时间为UTC时间,跟北京时间相差8小时,简单说UTC时间 = 北京时间 - 8小时
-
监控数据记载XX:XX:XX到XX:XX+35:XX的数据,即35分钟的观测数据,数据更新时间为观测结束时间,即XX:XX+35:XX
-
检测数据文件命名H08_UTC年月日_观测开始时间(XX:XX)__L2WLFbet_FLDK.06001_06001.csv,例如H08_20201118_0000_L2WLFbet_FLDK.06001_06001.csv
2.程序说明:
-
第一次启动时,立即下载当前UTC时间,服务器目录下的数据,扫描本地文件夹,将文件全部导入数据库
-
每当到达数据更新时间点,程序会通过申请好的账号连接一次服务器,下载更新的数据
-
本地根据当前UTC时间创建文件夹保存数据
-
数据下载到本地后,将其导入进数据库中
-
等待下次数据更新时间,再次执行上述3步流程
代码
关键代码:
1.数据库导入代码
因为在使用pandas读取该csv文件出现了数据错位的问题(不知道是文件的问题还是我电脑的问题),所以这里我通过引入csv库获取csv文件中的数据,然后通过pymysql进行数据库操作。
数据获取代码:
os.chdir(dst_filePath) # 路径设置成csv文件放的地方
path = os.getcwd()
files = os.listdir(path)
for k in range(0, len(files)):
df = csv.reader(open(r'%s\%s' % (dst_filePath, files[k]), 'r'))
datas = []
# 把csv各行以list存储,datas[0]为表头,剩余为数据
for data in df:
datas.append(data)
for m in range(1, len(datas)):
# 避免报sql语句错误,给数据拼接上单引号
datas[m][27] = datas[m][27] + '\''
datas[m][28] = '\'' + datas[m][28] + '\''
datas[m][29] = '\'' + datas[m][29] + '\''
datas[m][30] = '\'' + datas[m][30] + '\''
datas[m][31] = '\'' + datas[m][31]
值得注意的是,csv文件中最后5列元素存在单独的单引号,直接使用会报sql语句错误,所以我提前给数据拼接上引号。
sql语句写的很粗糙,将就着看吧(/▽\)
def csv_2_mysql(host, user, password, db, file_name, datas):
# 打开数据库连接
conn = pymysql.connect(host=host, user=user, passwd=password)
# 获取游标
cursor = conn.cursor()
cursor.execute("CREATE DATABASE IF NOT EXISTS `%s`" % db)
cursor.execute("USE `%s`" % db)
# 删除表
# cursor.execute("drop TABLE if exists `%s`" % file_name)
# conn.commit()
# 创建表
sql = "CREATE TABLE IF NOT EXISTS `%s`(""obstime VARCHAR (255),""dettime VARCHAR (255),""sat VARCHAR (255)," \
"algo VARCHAR (255),""okm VARCHAR (255),""sampl VARCHAR (255),""line VARCHAR (255),""lon VARCHAR (255)," \
"lat VARCHAR (255),""viewzenang VARCHAR (255),""viewazang VARCHAR (255),""pixwid VARCHAR (255)," \
"pixlen VARCHAR (255),""t07 VARCHAR (255),""t14 VARCHAR (255),""t07_t14 VARCHAR (255)," \
"meant07 VARCHAR (255),""meant14 VARCHAR (255),""meandt VARCHAR (255),""sdt07 VARCHAR (255)," \
"sdt14 VARCHAR (255),""sddt VARCHAR (255),""ref3 VARCHAR (255),""ref4 VARCHAR (255)," \
"fire_idx VARCHAR (255),""fire VARCHAR (255),""test1_6 VARCHAR (255)," \
"pixel_1 VARCHAR (255),""pixel_2 VARCHAR (255),""pixel_3 VARCHAR (255)," \
"pixel_4 VARCHAR (255),""pixel_5 VARCHAR (255))" % (file_name)
cursor.execute(sql)
# 添加数据
for j in range(1, len(datas)):
sql = "INSERT INTO `%s`(obstime,dettime,sat,algo,okm,sampl,line,lon,lat,viewzenang,viewazang" \
",pixwid,pixlen,t07,t14,t07_t14,meant07,meant14,meandt,sdt07,sdt14,sddt,ref3,ref4,fire_idx,fire" \
",test1_6,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" \
% (file_name, datas[j][0],
datas[j][1], datas[j][2], datas[j][3], datas[j][4], datas[j][5], datas[j][6],
datas[j][7],
datas[j][8], datas[j][9],
datas[j][10], datas[j][11], datas[j][12], datas[j][13], datas[j][14], datas[j][15],
datas[j][16], datas[j][17],
datas[j][18], datas[j][19], datas[j][20], datas[j][21], datas[j][22], datas[j][23],
datas[j][24], datas[j][25],
datas[j][26], datas[j][27], datas[j][28], datas[j][29], datas[j][30], datas[j][31])
cursor.execute(sql)
conn.commit()
运行结果:
完整代码:
# coding=gbk
import pymysql
import csv
import os
import ftplib
from datetime import datetime
import time
def getHourStr(hourNum, minNum):
if minNum < 35:
hourNum -=1
hourStr = str(hourNum)
if hourNum < 10:
hourStr = "0" + str(hourNum)
else:
hourStr = str(hourNum)
return hourStr
# 这个函数用于将日期从整型转为FTP路径所需的字符串
def getDateStr(yearNum, monNum, dayNum):
# 四位数年份
yearStr = str(yearNum)
# 两位数月份
if monNum < 10:
monStr = "0" + str(monNum)
else:
monStr = str(monNum)
# 两位数天
if dayNum < 10:
dayStr = "0" + str(dayNum)
else:
dayStr = str(dayNum)
return yearStr, monStr, dayStr
# 这个函数用于在跨月时获取前一天的日期号
def getYesterday(year, month, day):
if day == 1:
if month == 1:
year -= 1
month = 12
day = 31
elif month == 2 or month == 4 or month == 6 or month == 8 or month == 9 or month == 11:
month -= 1
day = 31
elif month == 5 or month == 7 or month == 10 or month == 12:
month -= 1
day = 30
elif month == 3:
# 闰年
if year % 4 == 0 and year % 400 == 0:
day = 29
month -= 1
# 闰年
elif year % 4 == 0 and year % 100 != 0:
day = 29
month -= 1
else:
day = 28
month -= 1
else:
day -= 1
return year, month, day
# 获取文件后缀名
def suffix(file, *suffixName):
array = map(file.endswith, suffixName)
if True in array:
return True
else:
return False
# 删除目录下扩展名为.temp的文件
def deleteFile(fileDir):
if not os.path.exists(fileDir):
os.makedirs(fileDir)
targetDir = fileDir
for file in os.listdir(targetDir):
targetFile = os.path.join(targetDir, file)
if suffix(file, '.temp'):
os.remove(targetFile)
def csv_2_mysql_FirstTime(host, user, password, db, file_name, datas):
# 打开数据库连接
conn = pymysql.connect(host=host, user=user, passwd=password)
# 获取游标
cursor = conn.cursor()
cursor.execute("CREATE DATABASE IF NOT EXISTS `%s`" % db)
cursor.execute("USE `%s`" % db)
# 删除表
cursor.execute("drop TABLE if exists `%s`" % file_name)
conn.commit()
# 创建表
sql = "CREATE TABLE IF NOT EXISTS `%s`(""obstime VARCHAR (255),""dettime VARCHAR (255),""sat VARCHAR (255)," \
"algo VARCHAR (255),""okm VARCHAR (255),""sampl VARCHAR (255),""line VARCHAR (255),""lon VARCHAR (255)," \
"lat VARCHAR (255),""viewzenang VARCHAR (255),""viewazang VARCHAR (255),""pixwid VARCHAR (255)," \
"pixlen VARCHAR (255),""t07 VARCHAR (255),""t14 VARCHAR (255),""t07_t14 VARCHAR (255)," \
"meant07 VARCHAR (255),""meant14 VARCHAR (255),""meandt VARCHAR (255),""sdt07 VARCHAR (255)," \
"sdt14 VARCHAR (255),""sddt VARCHAR (255),""ref3 VARCHAR (255),""ref4 VARCHAR (255)," \
"fire_idx VARCHAR (255),""fire VARCHAR (255),""test1_6 VARCHAR (255)," \
"pixel_1 VARCHAR (255),""pixel_2 VARCHAR (255),""pixel_3 VARCHAR (255)," \
"pixel_4 VARCHAR (255),""pixel_5 VARCHAR (255))" % (file_name)
cursor.execute(sql)
# 添加数据
for j in range(1, len(datas)):
sql = "INSERT INTO `%s`(obstime,dettime,sat,algo,okm,sampl,line,lon,lat,viewzenang,viewazang" \
",pixwid,pixlen,t07,t14,t07_t14,meant07,meant14,meandt,sdt07,sdt14,sddt,ref3,ref4,fire_idx,fire" \
",test1_6,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" \
% (file_name, datas[j][0],
datas[j][1], datas[j][2], datas[j][3], datas[j][4], datas[j][5], datas[j][6],
datas[j][7],
datas[j][8], datas[j][9],
datas[j][10], datas[j][11], datas[j][12], datas[j][13], datas[j][14], datas[j][15],
datas[j][16], datas[j][17],
datas[j][18], datas[j][19], datas[j][20], datas[j][21], datas[j][22], datas[j][23],
datas[j][24], datas[j][25],
datas[j][26], datas[j][27], datas[j][28], datas[j][29], datas[j][30], datas[j][31])
cursor.execute(sql)
conn.commit()
def csv_2_mysql(host, user, password, db, file_name, datas):
# 打开数据库连接
conn = pymysql.connect(host=host, user=user, passwd=password)
# 获取游标
cursor = conn.cursor()
cursor.execute("CREATE DATABASE IF NOT EXISTS `%s`" % db)
cursor.execute("USE `%s`" % db)
# 删除表
# cursor.execute("drop TABLE if exists `%s`" % file_name)
# conn.commit()
# 创建表
sql = "CREATE TABLE IF NOT EXISTS `%s`(""obstime VARCHAR (255),""dettime VARCHAR (255),""sat VARCHAR (255)," \
"algo VARCHAR (255),""okm VARCHAR (255),""sampl VARCHAR (255),""line VARCHAR (255),""lon VARCHAR (255)," \
"lat VARCHAR (255),""viewzenang VARCHAR (255),""viewazang VARCHAR (255),""pixwid VARCHAR (255)," \
"pixlen VARCHAR (255),""t07 VARCHAR (255),""t14 VARCHAR (255),""t07_t14 VARCHAR (255)," \
"meant07 VARCHAR (255),""meant14 VARCHAR (255),""meandt VARCHAR (255),""sdt07 VARCHAR (255)," \
"sdt14 VARCHAR (255),""sddt VARCHAR (255),""ref3 VARCHAR (255),""ref4 VARCHAR (255)," \
"fire_idx VARCHAR (255),""fire VARCHAR (255),""test1_6 VARCHAR (255)," \
"pixel_1 VARCHAR (255),""pixel_2 VARCHAR (255),""pixel_3 VARCHAR (255)," \
"pixel_4 VARCHAR (255),""pixel_5 VARCHAR (255))" % (file_name)
cursor.execute(sql)
# 添加数据
for j in range(1, len(datas)):
sql = "INSERT INTO `%s`(obstime,dettime,sat,algo,okm,sampl,line,lon,lat,viewzenang,viewazang" \
",pixwid,pixlen,t07,t14,t07_t14,meant07,meant14,meandt,sdt07,sdt14,sddt,ref3,ref4,fire_idx,fire" \
",test1_6,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
"%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" \
% (file_name, datas[j][0],
datas[j][1], datas[j][2], datas[j][3], datas[j][4], datas[j][5], datas[j][6],
datas[j][7],
datas[j][8], datas[j][9],
datas[j][10], datas[j][11], datas[j][12], datas[j][13], datas[j][14], datas[j][15],
datas[j][16], datas[j][17],
datas[j][18], datas[j][19], datas[j][20], datas[j][21], datas[j][22], datas[j][23],
datas[j][24], datas[j][25],
datas[j][26], datas[j][27], datas[j][28], datas[j][29], datas[j][30], datas[j][31])
cursor.execute(sql)
conn.commit()
class myFTP:
ftp = ftplib.FTP()
# 连接FTP,host是IP地址,port是端口,默认21
def __init__(self, host, port=21):
self.ftp.connect(host, port)
# 登录FTP连接,user是用户名,password是密码
def Login(self, user, password):
self.ftp.login(user, password)
print(self.ftp.welcome) # 显示登录信息
# 下载单个文件,LocalFile表示本地存储路径和文件名,RemoteFile是FTP路径和文件名
def DownLoadFile(self, LocalFile, RemoteFile):
bufSize = 102400
file_handler = open(LocalFile, 'wb')
print(file_handler)
# 接收服务器上文件并写入本地文件
self.ftp.retrbinary('RETR ' + RemoteFile, file_handler.write, bufSize)
self.ftp.set_debuglevel(0)
file_handler.close()
return True
# 下载整个目录下的文件,LocalDir表示本地存储路径, emoteDir表示FTP路径
def DownLoadFileTree_FirstTime(self, LocalDir, RemoteDir, choice):
# print("remoteDir:", RemoteDir)
# 如果本地不存在该路径,则创建
if not os.path.exists(LocalDir):
os.makedirs(LocalDir)
# 获取FTP路径下的全部文件名,以列表存储
# 好像是乱序
self.ftp.cwd(RemoteDir)
RemoteNames = self.ftp.nlst()
print(RemoteNames)
RemoteNames.reverse()
# print("RemoteNames:", RemoteNames)
for file in RemoteNames:
# 防止上一次下载中断后,最后一个下载的文件未下载完整,而再开始下载时,程序会识别为已经下载完成
Local = os.path.join(LocalDir, file[0:-4] + ".temp")
LocalNew = os.path.join(LocalDir, file)
# 若已经存在,则跳过下载
# 小时数据命名格式示例:
# H08_20201102_2350_L2WLFbet_FLDK.06001_06001.csv
# 创建文件夹保存数据
if choice == 1:
if not os.path.exists(LocalNew):
print("下载文件 %s 中" % file)
self.DownLoadFile(Local, file)
os.rename(Local, LocalNew)
print("文件 %s 下载完成\n" % file)
elif os.path.exists(LocalNew):
print("文件 %s 已存在!\n" % file)
self.ftp.cwd("..")
return
def DownLoadFileTree(self, LocalDir, RemoteDir, choice, _yearStr, _monStr):
# print("remoteDir:", RemoteDir)
# 如果本地不存在该路径,则创建
if not os.path.exists(LocalDir):
os.makedirs(LocalDir)
# 获取FTP路径下的全部文件名,以列表存储
# 好像是乱序
self.ftp.cwd(RemoteDir)
RemoteNames = self.ftp.nlst()
print(RemoteNames)
RemoteNames.reverse()
# print("RemoteNames:", RemoteNames)
for file in RemoteNames:
# 防止上一次下载中断后,最后一个下载的文件未下载完整,而再开始下载时,程序会识别为已经下载完成
Local = os.path.join(LocalDir, file[0:-4] + ".temp")
LocalNew = os.path.join(LocalDir, file)
# 若已经存在,则跳过下载
# 小时数据命名格式示例:
# H08_20201102_2350_L2WLFbet_FLDK.06001_06001.csv
# 创建文件夹保存数据
if choice == 1:
if not os.path.exists(LocalNew):
print("下载文件 %s 中" % file)
self.DownLoadFile(Local, file)
os.rename(Local, LocalNew)
print("文件 %s 下载完成\n" % file)
print("开始将文件 %s 导入数据库" % file)
df = csv.reader(open(r'%s\%s' % (dst_filePath, file), 'r'))
datas = []
for data in df:
datas.append(data)
for m in range(1, len(datas)):
# 避免报sql语句错误,给数据拼接上单引号
datas[m][27] = datas[m][27] + '\''
datas[m][28] = '\'' + datas[m][28] + '\''
datas[m][29] = '\'' + datas[m][29] + '\''
datas[m][30] = '\'' + datas[m][30] + '\''
datas[m][31] = '\'' + datas[m][31]
print('正在将 %s 文件导入数据库' % file)
db_name = 'Himawari-' + _yearStr + _monStr
csv_2_mysql('localhost', "root", "123123", db_name, file, datas)
print("文件 %s 导入成功 " % file)
elif os.path.exists(LocalNew):
print("文件 %s 已存在!\n" % file)
self.ftp.cwd("..")
return
def close(self):
self.ftp.quit()
if __name__ == "__main__":
utcdt = datetime.utcnow()
_yearNum = '20' + utcdt.strftime('%y')
_monNum = utcdt.strftime('%m')
_dayNum = utcdt.strftime('%d')
_hourNum = utcdt.strftime('%H')
_minNum = utcdt.strftime('%M')
_yearStr = ""
_monStr = ""
_dayStr = ""
_hourStr = ""
print("当前utc时间是:", utcdt)
# 传入IP地址
ftp = myFTP('ftp.ptree.jaxa.jp')
# 传入用户名和密码
ftp.Login('你申请的账号', '邮件提供给你的密码')
_yearStr, _monStr, _dayStr = getDateStr(int(_yearNum), int(_monNum), int(_dayNum))
_hourStr = getHourStr(int(_hourNum), int(_minNum))
# 从目标路径ftp_filePath将文件下载至本地路径dst_filePath
dst_filePath = "你想存放的文件夹" + "/" + _yearStr + _monStr + "/" + _dayStr + "/" + _hourStr
deleteFile(dst_filePath) # 先删除存储路径中的临时文件(也就是上次未下载完整的文件)
ftp_filePath = "/pub/himawari/L2/WLF/bet" + "/" + _yearStr + _monStr + "/" + _dayStr + "/" + _hourStr
print("当前下载时间:", utcdt)
ftp.DownLoadFileTree_FirstTime(dst_filePath, ftp_filePath, 1)
# 结束
ftp.close()
print("下载完成,开始扫描文件夹导入数据 ")
os.chdir(dst_filePath) # 路径设置成csv文件放的地方
path = os.getcwd()
files = os.listdir(path)
for k in range(0, len(files)):
df = csv.reader(open(r'%s\%s' % (dst_filePath, files[k]), 'r'))
datas = []
# 把csv各行以list存储,datas[0]为表头,剩余为数据
for data in df:
datas.append(data)
for m in range(1, len(datas)):
# 避免报sql语句错误,给数据拼接上单引号
datas[m][27] = datas[m][27] + '\''
datas[m][28] = '\'' + datas[m][28] + '\''
datas[m][29] = '\'' + datas[m][29] + '\''
datas[m][30] = '\'' + datas[m][30] + '\''
datas[m][31] = '\'' + datas[m][31]
print('正在将%s文件导入数据库' % files[k])
db_name = 'Himawari-' + _yearStr + _monStr
csv_2_mysql_FirstTime('localhost', "root", "123123", db_name, files[k], datas)
print('导入成功,开始导入下一个文件')
print("当前文件已全部导入 ")
while True:
utcdt = datetime.utcnow()
_yearNum = '20' + utcdt.strftime('%y')
_monNum = utcdt.strftime('%m')
_dayNum = utcdt.strftime('%d')
_hourNum = utcdt.strftime('%H')
_minNum = utcdt.strftime('%M')
_yearStr = ""
_monStr = ""
_dayStr = ""
_hourStr = ""
print("当前utc时间是:", utcdt)
# 传入IP地址
ftp = myFTP('ftp.ptree.jaxa.jp')
# 传入用户名和密码
ftp.Login('你申请的账号', '邮件提供给你的密码')
_yearStr, _monStr, _dayStr = getDateStr(int(_yearNum), int(_monNum), int(_dayNum))
_hourStr = getHourStr(int(_hourNum), int(_minNum))
# 从目标路径ftp_filePath将文件下载至本地路径dst_filePath
dst_filePath = "你想存放的文件夹" + "/" + _yearStr + _monStr + "/" + _dayStr + "/" + _hourStr
# input("请输入要存储下载的文件的本地路径:")
deleteFile(dst_filePath) # 先删除存储路径中的临时文件(也就是上次未下载完整的文件)
ftp_filePath = "/pub/himawari/L2/WLF/bet" + "/" + _yearStr + _monStr + "/" + _dayStr + "/" + _hourStr
print("当前下载时间:",utcdt)
ftp.DownLoadFileTree(dst_filePath, ftp_filePath, 1, _yearStr, _monStr)
# 结束
ftp.close()
print("下载与导入完成,等待下次数据更新 ")
if int(_minNum) % 10 < 6:
time.sleep(60 * (6 - (int(_minNum) % 10)))
else:
time.sleep(60 * (16 - (int(_minNum) % 10)))