本人之前的博客中介绍了批量下载ERA5数据的方法,在这里再此对方法进行优化,采用多进程下载。采用的Python 的Multiprocessing。
可以根据需求选择相应的参数、空间范围、分辨率、时间等等。
__author__ = "LNH"
import cdsapi
import numpy as np
import calendar
import os
from multiprocessing import Process
def download(c, var, year, month, day, h, area):
m = '{:0>2}'.format(str(month))
d = '{:0>2}'.format(str(day))
c.retrieve(
'reanalysis-era5-single-levels',
{
'variable': var,
'product_type':'reanalysis',
'year': year,
'month': month,
'day': day,
'time': h,
# 'pressure_level': ['1','2','3','5',
# '7','10','20','30',
# '50','70','100','125',
# '150','175','200','225',
# '250','300','350','400',
# '450','500','550','600',
# '650','700','750','775',
# '800','825','850','875',
# '900','925','950','975','1000'],
'area': area ,
'format':'netcdf',
'grid': '0.25/0.25'
},
'era5_' + '_single_levels_' + year + m + d + h[0:2] + '.nc')
return 0
if __name__ == '__main__':
# var: 你要下载的变量
# url: 个人账号的url
# key: 密钥
# yearstart, yearend: 数据起止年份
# monthstart, monthend: 数据起止月份
# daystart, dayend: 数据起止天
# h: 小时(默认24小时全部下载,从00:00开始)
# area: 数据的区域 [lat2, lon1, lat1, lon2]
# grid: 数据的分辨率, eg. "0.25/0.25"
# define directory in which data shall be stored
os.chdir("F:/")
#账号的url\uid\key
c = cdsapi.Client(url= "XXXXXXXXXXXXXXXXXXXXXXXXX",
key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
# c = cdsapi.Client(url= "https://cds.climate.copernicus.eu/api/v2",
# key ")
var = ['10m_u_component_of_wind',
'10m_v_component_of_wind',
'Total_cloud_cover',
'Total_column_ozone',
'Total_column_water',
'Total_column_water_vapour',
'2m_temperature',
'Skin_temperature',
'Sea_surface_temperature',
'Surface_pressure']
var1 = ['fraction_of_cloud_cover',
'specific_cloud_liquid_water_content',
'Ozone_mass_mixing_ratio',
'Specific_humidity',
'Temperature',
'u_component_of_wind',
'v_component_of_wind',
'geopotential']
var2 = ["orography"]
# define the years you want to download
yearstart = 2003
yearend = 2003
# define the start and end month you want to download
monthstart = 6
monthend = 6
# define the start and end day you want to download
daystart = 7
dayend = 31
# define spatial limits of download (eg. around Austria)
lon1 = 100.0
lon2 = 150.0
lat1 = 0.0
lat2 = 50.0
# create lists
years = np.array(range(yearstart,yearend+1),dtype="str")
area = [lat2, lon1, lat1, lon2]
for year in years:
if (int(year)==yearstart) and (int(year)==yearend):
months = np.array(range(monthstart,monthend+1),dtype="str")
elif (year == yearstart) :
months = np.array(range(monthstart,13),dtype="str")
elif (year == yearend):
months = np.array(range(1,monthend + 1),dtype="str")
else:
months = np.array(range(1,13),dtype="str")
for month in months:
m = '{:0>2}'.format(str(month))
# if int(month) < 10:
# m = '0' + month
# else:
# m = month
if(int(year) == yearstart) and (int(year) == yearend) and (int(month) == monthstart) and (int(month) == monthend):
days = list(np.array(range(daystart,dayend+1),dtype="str"))
elif (int(year) == yearstart) and (int(month) == monthstart):
days = list(np.array(range(daystart,calendar.monthrange(int(year),int(month))[1]+1),dtype="str"))
elif (int(year) == yearend) and (int(month) == monthend):
days = list(np.array(range(1,dayend+1),dtype="str"))
else:
days = list(np.array(range(1,calendar.monthrange(int(year),int(month))[1]+1),dtype="str"))
for day in days:
d = '{:0>2}'.format(str(day))
# if int(day) < 10:
# d = '0' + day
# else:
# d = day
pro_list = []
# 24 hours start at 00:00
for hour in range(24):
h = '{:0>2}'.format(str(hour)) + ":00"
p = Process(target=download, args=(c, var, year, month, day, h, area, ))
p.start()
pro_list.append(p)
for p in pro_list:
p.join()
print("主进程结束!")
del p