Python实现FTP自动下载文件实例（1）-使用python自动下载葵花8卫星（Himawari-8)L1数据产品(2km全圆盘）

几分出发

已于 2023-03-26 21:31:21 修改

阅读量1.4k

点赞数 9

分类专栏：气象文章标签： python 开发语言 Powered by 金山文档

于 2023-02-17 22:20:05 首次发布

本文链接：https://blog.csdn.net/weixin_44815511/article/details/129093661

版权

气象专栏收录该内容

1 篇文章 0 订阅

订阅专栏

按：由于工作中需要使用葵花8卫星的数据，通常情况下使用FTP类软件，直接连接葵花8数据服务器（ftp://ftp.ptree.jaxa.jp），手动选择要下载的数据即可。但是我要根据时间来筛选我所需要的数据，手动选择非常麻烦，因此尝试使用python进行自动下载。

一、数据简要介绍

在/jma/netcdf/路径下，存放netcdf格式文件（文件名以.nc结尾），该路径下按照年月/日又分为两级文件夹，如../202109/15/表示2021-09-15那一天的文件。文件夹内一共有三类文件，按照文件名结尾字符分别如下：

文件名结尾	区域	分辨率
.02701_02601.nc	日本（24N-50N, 123E-150E）	1km
.02401_02401.nc	全圆盘（60S-60N, 80E-160W）	5km
.06001_06001.nc	全圆盘（60S-60N, 80E-160W）	2km

关于文件名各字符具体含义，可参考/jma/README_HimawariNetCDF_en.txt，或点击下载葵花8（Himawari-8）卫星netCDF数据产品命名规则。

二、代码部分

1.代码介绍

我要下载的是2km全圆盘的nc文件，即以.06001_06001.nc为结尾的文件，同时要根据我输入的时间段进行下载，只下载覆盖该时间段的文件。我要传入多个时间段，这些时间段已经预先保存在excel表格里（后来我转成了csv文件，因此代码里用的是.csv文件）。注意输入的时间段必须是以下格式的字符串：

YYYY-MM-DD HH:MM -- YYYY-MM-DD HH:MM 如："2021-09-15 15:00 -- 2021-09-15 16:43"

图，保存多个时间段的csv文件内容

2.代码

import os
import ftplib
import datetime
import pandas as pd


# 根据时间段获得H8时间,返回覆盖该时间段的文件名
# 时间段字符串必须为如下格式："XXXX-XX-XX XX:XX -- XXXX-XX-XX XX:XX"
def downloadH8File(timeRange):
    sList = timeRange.split(' -- ')
    timeStart = datetime.datetime.strptime(sList[0], '%Y-%m-%d %H:%M')
    timeEnd = datetime.datetime.strptime(sList[1], '%Y-%m-%d %H:%M')
    timeStart -= datetime.timedelta(minutes=timeStart.minute % 10) 
    timeEnd -= datetime.timedelta(minutes=timeEnd.minute % 10)
    pathFlag = datetime.datetime.strftime(timeStart, '%Y%m/%d/%H%M')
    filename = pd.date_range(start=timeStart, end=timeEnd, freq='10T')
    filenameList = filename.to_series().dt.strftime('%Y%m%d_%H%M').to_list()
    for i in range(len(filenameList)):
        filenameList[i] = pathFlag[:10] + 'NC_H08_' + filenameList[i] + '_R21_FLDK.06001_06001.nc'  #如果需要其他类型文件，在这里修改文件名后缀
    return (filenameList)  #返回涵盖了传入时间段的文件名列表


# 获取文件后缀名
def suffix(file, *suffixName):
    array = map(file.endswith, suffixName)
    if True in array:
        return True
    else:
        return False

# 删除目录下扩展名为.temp的文件
def deleteFile(fileDir):
    targetDir = fileDir
    for file in os.listdir(targetDir):
        targetFile = os.path.join(targetDir, file)
        if suffix(file, '.temp'):
            os.remove(targetFile)

class myFTP:
    ftp = ftplib.FTP()
    # 连接FTP，host是IP地址，port是端口，默认21
    def __init__(self, host, port=21):
        self.host = host
        self.ftp.connect(host, port)
        self.log_file = open(r"D:\mywork\satellite\download_H8_log.txt", "a") # 存放日志文件地址，可提前创建空文件。

    # 登录FTP连接，user是用户名，password是密码
    def Login(self, user, password):
        try:
            self.debug_print('开始尝试连接到 %s' % self.host)
            self.ftp.login(user, password)
            self.debug_print('成功登录到 %s' % self.host)
            self.debug_print(self.ftp.welcome)
            print(self.ftp.welcome)  # 显示登录信息
            self.ftp.voidcmd('TYPE I')  # 设置传输模式为二进制
        except Exception as err:
            self.deal_error("FTP 连接或登录失败 ，错误描述为：%s" % err)
            pass

    # 下载单个文件，LocalFile表示本地存储路径和文件名，RemoteFile是FTP路径和文件名
    def DownLoadFile(self, LocalFile, RemoteFile):
        try:
            bufSize = 102400
            file_handler = open(LocalFile, 'wb')
            print(file_handler)
            # 接收服务器上文件并写入本地文件
            self.debug_print('>>>>>>>>>>>>下载文件 %s ... ...' % LocalFile)
            self.ftp.retrbinary('RETR ' + RemoteFile, file_handler.write, bufSize)
            self.ftp.set_debuglevel(0)
            file_handler.close()
        except Exception as err1:
            self.debug_print('下载文件出错，出现异常：%s ' % err1)
            return

    def is_same_size(self, local_file, remote_file):
        """判断远程文件和本地文件大小是否一致
           参数:
             local_file: 本地文件
             remote_file: 远程文件
        """
        try:
            remote_file_size = self.ftp.size(remote_file)
        except Exception as err2:
            # self.debug_print("is_same_size() 错误描述为：%s" % err)
            remote_file_size = -1
            self.deal_error(err2)

        try:
            local_file_size = os.path.getsize(local_file)
        except Exception as err3:
            # self.debug_print("is_same_size() 错误描述为：%s" % err)
            local_file_size = -1

        self.debug_print('local_file_size:%d  , remote_file_size:%d' % (local_file_size, remote_file_size))
        if remote_file_size == local_file_size:
            return True
        else:
            return False

    def debug_print(self, s):
        """ 打印日志
        """
        print(s)
        self.write_log(s)

    def deal_error(self, e):
        """ 处理错误异常
            参数：
                e：异常
        """
        log_str = '发生错误: %s' % e
        self.write_log(log_str)
        # sys.exit()

    def write_log(self, log_str):
        """ 记录日志
            参数：
                log_str：日志
        """
        datetime_now = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
        format_log_str = "%s ---> %s \n " % (datetime_now, log_str)
        # print(format_log_str)
        self.log_file.write(format_log_str)
    def close(self):
        self.debug_print("close()---> FTP退出")
        self.ftp.quit()
        self.log_file.close()


if __name__ == "__main__":
    # 传入IP地址
    ftp = myFTP('ftp.ptree.jaxa.jp')

    # 传入用户名和密码
    ftp.Login('用户名', '密码') # 网站注册账号 https://www.eorc.jaxa.jp/ptree/ 

    # 从目标路径ftp_filePath将文件下载至本地路径dst_filePath
    dst_filePath = input("请输入要存储下载的文件的本地路径：")  # E:/AOD_Download
    deleteFile(dst_filePath)  # 先删除存储路径中的临时文件（也就是上次未下载完整的文件）

    # print("remoteDir:", RemoteDir)
    # 如果本地不存在该路径，则创建
    LocalDir = dst_filePath
    if not os.path.exists(LocalDir):
        os.makedirs(LocalDir)

    # LidarTimeString = input("请输入开始日期和截止日期：")
    df = pd.read_csv(r"D:\mywork\timelist.csv")
    dflen = len(df)
    count = 0
    for i in range(dflen):
        count = count+1
        LidarTimeString = df.loc[i, 'CALIOP Lidar']
        filenamelist = downloadH8File(LidarTimeString)
        for file in filenamelist:
            ftp_filePath = "/jma/netcdf/" + "/" + file
            # 先下载为临时文件Local,下载完成后再改名为nc4格式的文件
            # 这是为了防止上一次下载中断后，最后一个下载的文件未下载完整，而再开始下载时，程序会识别为已经下载完成
            Local = os.path.join(LocalDir, file[10:-3] + ".temp")
            LocalNew = os.path.join(LocalDir, file[10:])

            if not os.path.exists(LocalNew):
                try:
                    print("Downloading the file of %s " % file[10:])
                    ftp.DownLoadFile(Local, ftp_filePath)
                    if ftp.is_same_size(Local, ftp_filePath):
                        os.rename(Local, LocalNew)
                        print("The download of the file of %s has finished\n" % file[10:])
                    else:
                        ftp.debug_print("初次下载文件异常或文件丢包：The download of the file of %s not finished" % file[10:])
                except Exception as e:
                    print(e)
                    ftp.deal_error(e)

            else:
                if not ftp.is_same_size(LocalNew, ftp_filePath):
                    ftp.debug_print("已存在文件但丢包，The download of the file of %s has not finished：" % file[10:])
                    try:
                        print("正在尝试重新下载丢包文件: %s \n"% file[10:])
                        os.remove(LocalNew)
                        ftp.DownLoadFile(Local, ftp_filePath)
                        if ftp.is_same_size(Local, ftp_filePath):
                            os.rename(Local, LocalNew)
                            ftp.debug_print("丢包文件已重新下载，The file of %s has redownload" % file[10:])
                        else:
                            ftp.debug_print("重新下载文件异常或丢包：The download of the file of %s not finished" % file[10:])
                    except Exception as ee:
                        print(ee)
                        ftp.deal_error(ee)
                else:
                    print("The file of %s has already existed!\n" % file[10:])
        print("Downloading............%d/%d"%(count, dflen))
    # 结束
    ftp.close()
    print("下载完成！")