ERA5数据多线程下载脚本

源木求渔

已于 2022-08-10 15:03:00 修改

阅读量932

点赞数 2

分类专栏：数据处理文章标签： python

于 2022-06-26 11:53:57 首次发布

本文链接：https://blog.csdn.net/m0_60678566/article/details/125468031

版权

数据处理专栏收录该内容

5 篇文章 4 订阅

订阅专栏

ERA5数据是欧洲中心发布的第五代全球天气气候再分析资料，可以获取1950至今的逐日逐小时再分析资料，对于新手来说，在网页上申请门槛较低，但是如果要获取大量的请求，那用Python脚本下载是更好的选择

ERA5 单层月平均再分析数据https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-monthly-means?tab=overview

用Python脚本下载ERA5数据，只要写好了下载请求的字典，一键发起请求+下载，免去在网页上操作、等待申请通过、点击下载、下载后重命名文件的一系列麻烦，这些都可以在Python上用一个脚本实现

根据网上找的脚本改了一下，实现根据时间、区域、变量、气压层搭配下载，只要根据需求自己改下文件命名规则和需要的时间、区域、变量、气压层就可以了（注释有提示）

单层数据（single-level）

from queue import Queue
from threading import Thread
import cdsapi
from time import time,strftime,localtime
import datetime
import os
from dateutil.relativedelta import relativedelta
from requests import request
# os.chdir("E:\\ResearchData\\20190527\\ERA5")
os.chdir(r"E:\ResearchData\download")

# 设置文件名函数（根据需求改）
def set_filename(request_dict):

    # 东西半球（根据需求改，这里举例分东西半球分别下载）
    if int(request_dict['area'][1])<0:
        ew='w'
    else:
        ew='e'

    # 请求的变量（根据需求改，这里举例请求3个变量，每个变量分别下载）
    if request_dict['variable'] == ['mean_sea_level_pressure']:
        var = 'mslp'
    elif request_dict['variable'] == ['2m_dewpoint_temperature']:
        var = 'td'
    elif request_dict['variable'] == ['2m_temperature' ]:
        var = 't'

    # 生成文件名
    filename=f"era5_monthly_{var}_{request_dict['year'][0]}_{request_dict['year'][-1]}_{ew}.nc"

    return filename
  



# 下载请求函数
def downloadonefile(request_dict):
    
    filename=set_filename(request_dict)

    if(os.path.isfile(filename)): #如果存在文件名则返回
        print("ok",filename)

    else:
        print(f'{filename} begin \n')
        c = cdsapi.Client()
        c.retrieve(
            'reanalysis-era5-single-levels-monthly-means', #根据数据集名称改
            request_dict,
            filename)
        print(f'\n {filename} finish \n')




#下载脚本 
class DownloadWorker(Thread):
    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue
    
    def run(self):
        while True:
            # 从队列中获取任务并扩展tuple
            request_dict = self.queue.get()
            downloadonefile(request_dict)
            self.queue.task_done()


#主程序 
def main():
    #请求字典
    request_dict={
                'format'       : 'netcdf',
                'product_type' : 'monthly_averaged_reanalysis',
                'variable'     : [],
                'year'         : [],
                'month'        : ['01', '02', '03','04', '05', '06','07', '08', '09','10', '11', '12',],
                'time'         : '00:00',
                # 'area'         : [60, -180, -50,-60,], # North, West, South, East. Default: global
                }
    
    #起始时间
    ts = time()
    #起始日期
    begin = datetime.date(1959,1,1) 
    end = datetime.date(2020,12,31)
    d=begin
    #    delta = datetime.timedelta(days=1)
    #    delta = relativedelta(months=1)
    delta = relativedelta(years=10)  #根据需求改，设置每隔多久的数据存放一个文件，这里举例每10年存在一文件

    # 生成时间列表（根据需求改）
    times=[]
    while d <= end:
        riqi=d.strftime("%Y%m%d")
        times.append(str(riqi))
        d += delta
    
    # 生成经纬度列表（根据需求改,这里举例分东西半球下载）
    areas = [[60, -180, -50, -60,],[60, 30, -50, 180,]]

    # 生成变量列表（根据需求改,这里举例请求3个变量，每个变量存放一个文件）
    variables = [['mean_sea_level_pressure'],
                 ['2m_dewpoint_temperature'],
                 ['2m_temperature' ]]

    #建立下载序列
    links = []
    for riqi in times:
        for area in areas:
            for variable in variables:   #根据需求改
                request_dict.update(year=[str(int(riqi[:4])+i) for i in range(10)],  #根据需求改，这里举例每10年数据存在一文件
                                    area=area,
                                    variable=variable,
                                    )
                links.append(request_dict.copy())
       
    #创建一个主进程与工作进程通信
    queue = Queue()

    # 20191119更新# 新的请求规则 https://cds.climate.copernicus.eu/live/limits
    # 注意，每个用户同时最多接受4个request https://cds.climate.copernicus.eu/vision
    #创建四个工作线程
    for x in range(4):
        worker = DownloadWorker(queue)
        #将daemon设置为True将会使主线程退出，即使所有worker都阻塞了
        worker.daemon = True
        worker.start()
        
    #将任务以tuple的形式放入队列中
    for link in links:
        queue.put((link))

    #让主线程等待队列完成所有的任务
    queue.join()

    #获取当前时间 ，并以当前格式显示
    timeshow = strftime('%Y-%m-%d %H:%M:%S',localtime(time()))  
    print(timeshow)
    # 显示总共需要时间
    print('Took {} s'.format(time() - ts))

if __name__ == '__main__':
   main()

气压层数据（pressure-levels），与单层数据大同小异，只是加上了气压层这个变量，注释就不写这么详细了

from queue import Queue
from threading import Thread
import cdsapi
from time import time,strftime,localtime
import datetime
import os
from dateutil.relativedelta import relativedelta
from requests import request
# os.chdir("E:\\ResearchData\\20190527\\ERA5")
os.chdir(r"E:\ResearchData\download")

# 设置文件名函数（根据需求改）
def set_filename(request_dict):
    
    # 东西半球
    if int(request_dict['area'][1])<0:
        ew='w'
    else:
        ew='e'

    # 请求的变量（根据需求改）
    if request_dict['variable'] == ['geopotential']:
        var = 'hgt'
    else:
        var = 'uvw'

    # 生成文件名
    filename=f"era5_monthly_{var}_{request_dict['year'][0]}-{request_dict['year'][-1]}_{ew}.nc"

    return filename
  

# 下载请求函数
def downloadonefile(request_dict):
    
    filename=set_filename(request_dict)

    if(os.path.isfile(filename)): #如果存在文件名则返回
        print("ok",filename)

    else:
        print(f'{filename} begin \n')
        c = cdsapi.Client()
        c.retrieve(
            'reanalysis-era5-pressure-levels-monthly-means-preliminary-back-extension',  #根据数据集名称改
            request_dict,
            filename)
        print(f'{filename} finish \n')

    
#下载脚本 
class DownloadWorker(Thread):
    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue
    
    def run(self):
        while True:
            # 从队列中获取任务并扩展tuple
            request_dict = self.queue.get()
            downloadonefile(request_dict)
            self.queue.task_done()


#主程序 
def main():
    #请求字典
    request_dict={
                'format'        : 'netcdf',
                'product_type'  : 'reanalysis-monthly-means-of-daily-means',
                'variable'      : [],
                'pressure_level': [],
                'year'          : [],
                'month'         : ['01','02','03','04','05','06','07','08','09','10','11','12',],
                'time'          : '00:00',
                # 'area'          : [60, -180, -50,-60,], # North, West, South, East. Default: global
                }
    
    #起始时间
    ts = time()
    #起始日期
    begin = datetime.date(1950,1,1) 
    end = datetime.date(1958,12,31)
    d=begin
    # delta = datetime.timedelta(days=1)
    # delta = relativedelta(months=1)
    delta = relativedelta(years=10)   #根据需求改，设置每隔多久的数据存放一个文件
    
    # 生成时间列表（根据需求改）
    times=[]
    while d <= end:
        riqi=d.strftime("%Y%m%d")
        times.append(str(riqi))
        d += delta
    
    # 生成经纬度列表（根据需求改）
    areas = [[60, -180, -50,-60,],[60, 30, -50,180,]] # North, West, South, East. Default: global

    # 生成变量列表（根据需求改）
    variables = [['u_component_of_wind', 'v_component_of_wind', 'vertical_velocity',],['geopotential']]

    # 生成高度层列表（根据需求改）
    pressure_levels  = [['100', '125', '150','175', '200', '225','250', '300', '350',
                         '400', '450', '500','550', '600', '650','700', '750', '775',
                         '800', '825', '850','875', '900', '925','950', '975', '1000',],
                        ['100','500']
                       ]

    #建立下载序列
    links = []
    for riqi in times:
        for area in areas: 
            for variable,pressure_level in zip(variables,pressure_levels):  #根据需求改，这里举例的是变量和气压层搭配
                request_dict.update(year=[str(int(riqi[:4])+i) for i in range(9)], #根据需求改
                                    area=area,
                                    variable=variable,
                                    pressure_level=pressure_level,
                                    )
                links.append(request_dict.copy())
        
    #创建一个主进程与工作进程通信
    queue = Queue()

    # 20191119更新# 新的请求规则 https://cds.climate.copernicus.eu/live/limits
    # 注意，每个用户同时最多接受4个request https://cds.climate.copernicus.eu/vision
    #创建四个工作线程
    for x in range(4):
        worker = DownloadWorker(queue)
        #将daemon设置为True将会使主线程退出，即使所有worker都阻塞了
        worker.daemon = True
        worker.start()
        
    #将任务以tuple的形式放入队列中
    for link in links:
        queue.put((link))

    #让主线程等待队列完成所有的任务
    queue.join()

    #获取当前时间 ，并以当前格式显示
    timeshow = strftime('%Y-%m-%d %H:%M:%S',localtime(time())) 
    print(timeshow)
    # 显示总共需要时间
    print('Took {} s'.format(time() - ts))

if __name__ == '__main__':
   main()

参考：

https://mp.weixin.qq.com/s/lOQ_6s8grFSY3TU6XcnO8w
https://github.com/jiangleads/Get_ECMWF_Data

http://t.csdn.cn/BAL6R

欢迎交流

祝大家科研顺利~