ERA5数据是欧洲中心发布的第五代全球天气气候再分析资料,可以获取1950至今的逐日逐小时再分析资料,对于新手来说,在网页上申请门槛较低,但是如果要获取大量的请求,那用Python脚本下载是更好的选择
用Python脚本下载ERA5数据,只要写好了下载请求的字典,一键发起请求+下载,免去在网页上操作、等待申请通过、点击下载、下载后重命名文件的一系列麻烦,这些都可以在Python上用一个脚本实现
根据网上找的脚本改了一下,实现根据时间、区域、变量、气压层搭配下载,只要根据需求自己改下文件命名规则和需要的时间、区域、变量、气压层就可以了(注释有提示)
单层数据(single-level)
from queue import Queue
from threading import Thread
import cdsapi
from time import time,strftime,localtime
import datetime
import os
from dateutil.relativedelta import relativedelta
from requests import request
# os.chdir("E:\\ResearchData\\20190527\\ERA5")
os.chdir(r"E:\ResearchData\download")
# 设置文件名函数(根据需求改)
def set_filename(request_dict):
# 东西半球(根据需求改,这里举例分东西半球分别下载)
if int(request_dict['area'][1])<0:
ew='w'
else:
ew='e'
# 请求的变量(根据需求改,这里举例请求3个变量,每个变量分别下载)
if request_dict['variable'] == ['mean_sea_level_pressure']:
var = 'mslp'
elif request_dict['variable'] == ['2m_dewpoint_temperature']:
var = 'td'
elif request_dict['variable'] == ['2m_temperature' ]:
var = 't'
# 生成文件名
filename=f"era5_monthly_{var}_{request_dict['year'][0]}_{request_dict['year'][-1]}_{ew}.nc"
return filename
# 下载请求函数
def downloadonefile(request_dict):
filename=set_filename(request_dict)
if(os.path.isfile(filename)): #如果存在文件名则返回
print("ok",filename)
else:
print(f'{filename} begin \n')
c = cdsapi.Client()
c.retrieve(
'reanalysis-era5-single-levels-monthly-means', #根据数据集名称改
request_dict,
filename)
print(f'\n {filename} finish \n')
#下载脚本
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
# 从队列中获取任务并扩展tuple
request_dict = self.queue.get()
downloadonefile(request_dict)
self.queue.task_done()
#主程序
def main():
#请求字典
request_dict={
'format' : 'netcdf',
'product_type' : 'monthly_averaged_reanalysis',
'variable' : [],
'year' : [],
'month' : ['01', '02', '03','04', '05', '06','07', '08', '09','10', '11', '12',],
'time' : '00:00',
# 'area' : [60, -180, -50,-60,], # North, West, South, East. Default: global
}
#起始时间
ts = time()
#起始日期
begin = datetime.date(1959,1,1)
end = datetime.date(2020,12,31)
d=begin
# delta = datetime.timedelta(days=1)
# delta = relativedelta(months=1)
delta = relativedelta(years=10) #根据需求改,设置每隔多久的数据存放一个文件,这里举例每10年存在一文件
# 生成时间列表(根据需求改)
times=[]
while d <= end:
riqi=d.strftime("%Y%m%d")
times.append(str(riqi))
d += delta
# 生成经纬度列表(根据需求改,这里举例分东西半球下载)
areas = [[60, -180, -50, -60,],[60, 30, -50, 180,]]
# 生成变量列表(根据需求改,这里举例请求3个变量,每个变量存放一个文件)
variables = [['mean_sea_level_pressure'],
['2m_dewpoint_temperature'],
['2m_temperature' ]]
#建立下载序列
links = []
for riqi in times:
for area in areas:
for variable in variables: #根据需求改
request_dict.update(year=[str(int(riqi[:4])+i) for i in range(10)], #根据需求改,这里举例每10年数据存在一文件
area=area,
variable=variable,
)
links.append(request_dict.copy())
#创建一个主进程与工作进程通信
queue = Queue()
# 20191119更新# 新的请求规则 https://cds.climate.copernicus.eu/live/limits
# 注意,每个用户同时最多接受4个request https://cds.climate.copernicus.eu/vision
#创建四个工作线程
for x in range(4):
worker = DownloadWorker(queue)
#将daemon设置为True将会使主线程退出,即使所有worker都阻塞了
worker.daemon = True
worker.start()
#将任务以tuple的形式放入队列中
for link in links:
queue.put((link))
#让主线程等待队列完成所有的任务
queue.join()
#获取当前时间 ,并以当前格式显示
timeshow = strftime('%Y-%m-%d %H:%M:%S',localtime(time()))
print(timeshow)
# 显示总共需要时间
print('Took {} s'.format(time() - ts))
if __name__ == '__main__':
main()
气压层数据(pressure-levels),与单层数据大同小异,只是加上了气压层这个变量,注释就不写这么详细了
from queue import Queue
from threading import Thread
import cdsapi
from time import time,strftime,localtime
import datetime
import os
from dateutil.relativedelta import relativedelta
from requests import request
# os.chdir("E:\\ResearchData\\20190527\\ERA5")
os.chdir(r"E:\ResearchData\download")
# 设置文件名函数(根据需求改)
def set_filename(request_dict):
# 东西半球
if int(request_dict['area'][1])<0:
ew='w'
else:
ew='e'
# 请求的变量(根据需求改)
if request_dict['variable'] == ['geopotential']:
var = 'hgt'
else:
var = 'uvw'
# 生成文件名
filename=f"era5_monthly_{var}_{request_dict['year'][0]}-{request_dict['year'][-1]}_{ew}.nc"
return filename
# 下载请求函数
def downloadonefile(request_dict):
filename=set_filename(request_dict)
if(os.path.isfile(filename)): #如果存在文件名则返回
print("ok",filename)
else:
print(f'{filename} begin \n')
c = cdsapi.Client()
c.retrieve(
'reanalysis-era5-pressure-levels-monthly-means-preliminary-back-extension', #根据数据集名称改
request_dict,
filename)
print(f'{filename} finish \n')
#下载脚本
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
# 从队列中获取任务并扩展tuple
request_dict = self.queue.get()
downloadonefile(request_dict)
self.queue.task_done()
#主程序
def main():
#请求字典
request_dict={
'format' : 'netcdf',
'product_type' : 'reanalysis-monthly-means-of-daily-means',
'variable' : [],
'pressure_level': [],
'year' : [],
'month' : ['01','02','03','04','05','06','07','08','09','10','11','12',],
'time' : '00:00',
# 'area' : [60, -180, -50,-60,], # North, West, South, East. Default: global
}
#起始时间
ts = time()
#起始日期
begin = datetime.date(1950,1,1)
end = datetime.date(1958,12,31)
d=begin
# delta = datetime.timedelta(days=1)
# delta = relativedelta(months=1)
delta = relativedelta(years=10) #根据需求改,设置每隔多久的数据存放一个文件
# 生成时间列表(根据需求改)
times=[]
while d <= end:
riqi=d.strftime("%Y%m%d")
times.append(str(riqi))
d += delta
# 生成经纬度列表(根据需求改)
areas = [[60, -180, -50,-60,],[60, 30, -50,180,]] # North, West, South, East. Default: global
# 生成变量列表(根据需求改)
variables = [['u_component_of_wind', 'v_component_of_wind', 'vertical_velocity',],['geopotential']]
# 生成高度层列表(根据需求改)
pressure_levels = [['100', '125', '150','175', '200', '225','250', '300', '350',
'400', '450', '500','550', '600', '650','700', '750', '775',
'800', '825', '850','875', '900', '925','950', '975', '1000',],
['100','500']
]
#建立下载序列
links = []
for riqi in times:
for area in areas:
for variable,pressure_level in zip(variables,pressure_levels): #根据需求改,这里举例的是变量和气压层搭配
request_dict.update(year=[str(int(riqi[:4])+i) for i in range(9)], #根据需求改
area=area,
variable=variable,
pressure_level=pressure_level,
)
links.append(request_dict.copy())
#创建一个主进程与工作进程通信
queue = Queue()
# 20191119更新# 新的请求规则 https://cds.climate.copernicus.eu/live/limits
# 注意,每个用户同时最多接受4个request https://cds.climate.copernicus.eu/vision
#创建四个工作线程
for x in range(4):
worker = DownloadWorker(queue)
#将daemon设置为True将会使主线程退出,即使所有worker都阻塞了
worker.daemon = True
worker.start()
#将任务以tuple的形式放入队列中
for link in links:
queue.put((link))
#让主线程等待队列完成所有的任务
queue.join()
#获取当前时间 ,并以当前格式显示
timeshow = strftime('%Y-%m-%d %H:%M:%S',localtime(time()))
print(timeshow)
# 显示总共需要时间
print('Took {} s'.format(time() - ts))
if __name__ == '__main__':
main()
参考:
https://mp.weixin.qq.com/s/lOQ_6s8grFSY3TU6XcnO8w
https://github.com/jiangleads/Get_ECMWF_Data
欢迎交流
祝大家科研顺利~