ERA5数据处理(学习记录)

想要将ERA5的.nc数据中值提取到地面站点,并且整理转换为csv文件。

db_2022_08_bj12.nc 说明:db——地表;2022——年份;08——月份;bj12——北京时间12点

# 单层数据处理
import arcpy
import glob
import os
import pandas as pd
import datetime
folder_path = 'E:/DATA/ERA5_sun/'
file_paths = glob.glob(os.path.join(folder_path,"*.nc"))

for file_path in file_paths:
    file_name = os.path.basename(file_path)
    # Local variables:
    station_dem_shp = "E:/DATA/ERA5_surface/station.shp"
    station_dem_shp__2_ = station_dem_shp
    db_2022_06_bj02_nc = folder_path + file_name
    d2m_Layer = f"{file_name}_ssr" # 改
    v111_xls = folder_path + f"table/{d2m_Layer}.xls"
# d2m sp t2m u10 v10 u100 v100 
    # Process: 创建 NetCDF 栅格图层
    arcpy.MakeNetCDFRasterLayer_md(db_2022_06_bj02_nc, "ssr", "longitude", "latitude", d2m_Layer, "time", "", "BY_VALUE") # 改
    # Process: 多值提取至点
    arcpy.gp.ExtractMultiValuesToPoints(station_dem_shp, d2m_Layer, "NONE")
    # Process: 表转 Excel
    arcpy.TableToExcel_conversion(station_dem_shp__2_, v111_xls, "NAME", "CODE")
    # 获取 shp 的所有字段
    fields = arcpy.ListFields(station_dem_shp)
    # 获取所有字段的名称
    f_names = []
    for f in fields: f_names.append(f.name)
    # 通过字段名称删除字段
    arcpy.DeleteField_management(station_dem_shp, f_names[6:])


# # 整理表格形状
folder_path = 'E:/DATA/ERA5_sun/table/'
file_paths = glob.glob(os.path.join(folder_path,"*.xls"))

for file_path in file_paths:
    file_name = os.path.basename(file_path)
    file_xls = folder_path + file_name
    df = pd.read_excel(file_xls)
    df.columns.str.extract(r'(b\d+_.*)', expand=False).dropna()   # \d+-.*   b\d+_.*
    df1 = df.melt(['站号'], df.columns.str.extract(r'(b\d+_.*)', expand=False).dropna(), var_name='时间')
    
    new_name1 = file_name.replace(".xls","")
    new_name = new_name1.replace(".nc","")
    new_name_split = new_name.split("_")
    dff = pd.DataFrame()
    dff["站号"] = df1["站号"]
    dff["监测时间"] = df1.apply(lambda row: new_name_split[1]+'-'+new_name_split[2]+'-'+row["时间"].split("_")[0].replace("b","")+" "+new_name_split[3].replace("bj","")+":00:00" ,axis=1)

    # dff["监测时间"] = df1.apply(lambda row: new_name_split[1]+'-'+new_name_split[2]+'-'+'31'+" "+new_name_split[3].replace("bj","")+":00:00" ,axis=1)   # 针对5月31日
     
    # dff["监测时间"] = pd.to_datetime(dff["监测时间"]) + datetime.timedelta(1)  # 加一天
    dff["监测时间"] = pd.to_datetime(dff["监测时间"])
    dff[f"{new_name_split[4]}"] = df1["value"] 
    dff.to_csv(f"E:/DATA/ERA5_sun/csv/{new_name}.csv",encoding="gbk",index = False)

接下来同理处理37个气压层下的再分析数据:

# ### Import arcpy module
import arcpy
import glob
import os
import pandas as pd

folder_path = 'E:/DATA/ERA5_37/'
file_paths = glob.glob(os.path.join(folder_path,"*.nc"))
level_dataset = ["level 1","level 2","level 3","level 5","level 7","level 10","level 20","level 30","level 50","level 70",
                     "level 100","level 125","level 150","level 175","level 200","level 225","level 250","level 300","level 350","level 400",
                     "level 450","level 500","level 550","level 600","level 650","level 700","level 750","level 775","level 800","level 825",
                     "level 850","level 875","level 900","level 925","level 950","level 975","level 1000"]
for file_path in file_paths:
    
    for i in range(len(level_dataset)):

        file_name = os.path.basename(file_path)
        # Local variables:
        station = "E:/DATA/ERA5_37/station.shp"
        v2022_06_bj08_nc = folder_path + file_name
        # d z r t w
        
        d_Layer1 = f"{file_name}_w_{i}"  # 要改
        station__2_ = station
        v1111_xls = folder_path + f"table/{file_name}_w_{i+1}.xls" # 要改

        # Process: 创建 NetCDF 栅格图层
        arcpy.MakeNetCDFRasterLayer_md(v2022_06_bj08_nc, "w", "longitude", "latitude", d_Layer1, "time", level_dataset[i], "BY_VALUE") # 要改

        # Process: 多值提取至点
        arcpy.gp.ExtractMultiValuesToPoints_sa(station, d_Layer1, "NONE")

        # Process: 表转 Excel
        arcpy.TableToExcel_conversion(station__2_, v1111_xls, "NAME", "CODE")

        # 获取 shp 的所有字段
        fields = arcpy.ListFields(station)
        # 获取所有字段的名称
        f_names = []
        for f in fields: f_names.append(f.name)
        # 通过字段名称删除字段
        arcpy.DeleteField_management(station, f_names[6:])




# # 表格整理 更全一点  有改动
import glob
import os
import pandas as pd
import datetime as dt
from datetime import datetime
import time
# # 表格整理
folder_path = 'E:/DATA/ERA5_37/table/'
file_paths = glob.glob(os.path.join(folder_path,"*.xls"))

for file_path in file_paths:
    file_name = os.path.basename(file_path)
    date_replace = file_name.replace(".xls","")
    date_split = date_replace.split("_")
    # 获取气压层数
    qiyaceng = date_split[4]
    # 获取部分日期
    date = date_split[0]+"-"+date_split[1]
    # 新建一个数据框
    file_xls = folder_path + file_name
    df = pd.read_excel(file_xls)
    df1 = pd.DataFrame()
    # df1["站号"] = df.apply(lambda row: row['站号'],axis=1)
    df1 = df.melt(['站号'],df.columns.str.extract(r'(b\d+_.*)',expand=False).dropna(),var_name= '时间',value_name = f'{date_split[3]}')
    df1["过渡时间"] = df1.apply(lambda row: date +"-"+ row['时间'].split("_")[0],axis=1)
    # df1["过渡时间"] = df1.apply(lambda row: date +"-"+ '31',axis=1) # 5月31日
    hour_1 = date_split[2].split(".")
    hour = hour_1[0].replace("bj","")
    
    if hour == '00':
        hour = '24'
        df1["监测时间"] = df1.apply(lambda row: row["过渡时间"].replace("b","")+" "+f"{hour}"+":00:00",axis=1)
    else:
        df1["监测时间"] = df1.apply(lambda row: row["过渡时间"].replace("b","")+" "+f"{hour}"+":00:00",axis=1)
        df1["监测时间"] = pd.to_datetime(df1["监测时间"],format='%Y-%m-%d %H:%M:%S') + pd.to_timedelta(1,unit='D')
    """ 
    df1["监测时间"] = df1.apply(lambda row: row["过渡时间"].replace("b","")+" "+f"{hour}"+":00:00",axis=1)
    df1["监测时间"] = pd.to_datetime(df1["监测时间"])
    """
    
    df1["气压层级"] = qiyaceng
    new_name = file_name.replace(".xls","")
    df1.to_csv(f"E:/DATA/ERA5_37/csv/{new_name}.csv",encoding="gbk")

得到最后整理样式:

降水数据CMORPH爬取:

import re
from bs4 import BeautifulSoup
import urllib.request
import ssl
 
mon = ["06","07","08","09","10"]
for month in mon:
    days = ["01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30"]
    if month =="07" or month =="08" or month=="10":
        days.append("31")
    for day in days:
        TR_url = f'https://www.ncei.noaa.gov/data/cmorph-high-resolution-global-precipitation-estimates/access/hourly/0.25deg/2022/{month}/{day}/'
        my_url = urllib.request.urlopen(TR_url).read().decode('ascii')
        # print(my_url)
        soup = BeautifulSoup(my_url,'lxml')
        
        url_list=soup.find_all(href=re.compile(".nc"))
        print('step1 has fininshed!')
        
        urls=[]
        for i in url_list[1:]:
            urls.append(TR_url+i.get('href'))
        print ('step2 has finished!')
        print(urls)
        for i, url in enumerate(urls):
            file_name = "E:/DATA/降水/"+url.split('/')[-1]
            urllib.request.urlretrieve(url, file_name)
        
        print ('congratulations! you got them all!')

### 使用Python获取和处理ERA5每日总降水量数据 #### 安装必要的库 为了有效地获取并处理ERA5的日总降水量数据,需安装特定版本的Python库。推荐使用Anaconda环境来管理依赖项: ```bash conda create -n era5_env python=3.6 anaconda conda activate era5_env pip install cdsapi xarray netCDF4 rasterio pandas pyhdf fiona shapely gdal[^1] ``` #### 下载ERA5日总降水量数据 通过`cdsapi`可以从Copernicus Climate Data Store (CDS)下载ERA5数据。首先需要注册账号并设置API密钥。 ```python import cdsapi c = cdsapi.Client() c.retrieve( 'reanalysis-era5-single-levels', { 'product_type': 'reanalysis', 'variable': 'total precipitation', 'year': [ '2022' ], 'month': [ '01', '02', '03', # 添加更多月份... ], 'day': [ '01', '02', '03', # 添加更多日期... ], 'time': ['00:00'], 'format': 'netcdf' # 输出格式为NetCDF文件 }, 'downloaded_era5_precipitation.nc') ``` 这段脚本会请求指定年份、月份以及天数下的ERA5单层产品中的“总降水量”,并将结果保存成名为`downloaded_era5_precipitation.nc`的本地文件[^3]。 #### 处理ERA5 NetCDF数据 由于ERA5数据经过压缩存储,在读取时需要注意解码操作以获得实际物理意义上的数值。这里展示如何加载并转换这些数据至更易理解的形式: ```python import xarray as xr # 加载NetCDF文件 ds = xr.open_dataset('downloaded_era5_precipitation.nc') # 查看基本信息 print(ds) # 解码tp变量(即累积降水量),将其从米(m)转为毫米(mm),并去除缩放因子影响 precip_mm = ds['tp'] * 1e3 # 将单位由m变为mm, scale_factor已自动应用于读入过程中[^2] # 显示前几条记录作为示例输出 print(precip_mm.isel(time=slice(0, 5))) ``` 上述代码片段展示了如何打开一个NetCDF格式的ERA5数据集,并针对其中的`tp`(Total Precipitation)字段进行了简单的单位转换工作,使得最终得到的结果是以毫米计的日累计降水量值。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值