ERA5数据处理(学习记录)

想要将ERA5的.nc数据中值提取到地面站点,并且整理转换为csv文件。

db_2022_08_bj12.nc 说明:db——地表;2022——年份;08——月份;bj12——北京时间12点

# 单层数据处理
import arcpy
import glob
import os
import pandas as pd
import datetime
folder_path = 'E:/DATA/ERA5_sun/'
file_paths = glob.glob(os.path.join(folder_path,"*.nc"))

for file_path in file_paths:
    file_name = os.path.basename(file_path)
    # Local variables:
    station_dem_shp = "E:/DATA/ERA5_surface/station.shp"
    station_dem_shp__2_ = station_dem_shp
    db_2022_06_bj02_nc = folder_path + file_name
    d2m_Layer = f"{file_name}_ssr" # 改
    v111_xls = folder_path + f"table/{d2m_Layer}.xls"
# d2m sp t2m u10 v10 u100 v100 
    # Process: 创建 NetCDF 栅格图层
    arcpy.MakeNetCDFRasterLayer_md(db_2022_06_bj02_nc, "ssr", "longitude", "latitude", d2m_Layer, "time", "", "BY_VALUE") # 改
    # Process: 多值提取至点
    arcpy.gp.ExtractMultiValuesToPoints(station_dem_shp, d2m_Layer, "NONE")
    # Process: 表转 Excel
    arcpy.TableToExcel_conversion(station_dem_shp__2_, v111_xls, "NAME", "CODE")
    # 获取 shp 的所有字段
    fields = arcpy.ListFields(station_dem_shp)
    # 获取所有字段的名称
    f_names = []
    for f in fields: f_names.append(f.name)
    # 通过字段名称删除字段
    arcpy.DeleteField_management(station_dem_shp, f_names[6:])


# # 整理表格形状
folder_path = 'E:/DATA/ERA5_sun/table/'
file_paths = glob.glob(os.path.join(folder_path,"*.xls"))

for file_path in file_paths:
    file_name = os.path.basename(file_path)
    file_xls = folder_path + file_name
    df = pd.read_excel(file_xls)
    df.columns.str.extract(r'(b\d+_.*)', expand=False).dropna()   # \d+-.*   b\d+_.*
    df1 = df.melt(['站号'], df.columns.str.extract(r'(b\d+_.*)', expand=False).dropna(), var_name='时间')
    
    new_name1 = file_name.replace(".xls","")
    new_name = new_name1.replace(".nc","")
    new_name_split = new_name.split("_")
    dff = pd.DataFrame()
    dff["站号"] = df1["站号"]
    dff["监测时间"] = df1.apply(lambda row: new_name_split[1]+'-'+new_name_split[2]+'-'+row["时间"].split("_")[0].replace("b","")+" "+new_name_split[3].replace("bj","")+":00:00" ,axis=1)

    # dff["监测时间"] = df1.apply(lambda row: new_name_split[1]+'-'+new_name_split[2]+'-'+'31'+" "+new_name_split[3].replace("bj","")+":00:00" ,axis=1)   # 针对5月31日
     
    # dff["监测时间"] = pd.to_datetime(dff["监测时间"]) + datetime.timedelta(1)  # 加一天
    dff["监测时间"] = pd.to_datetime(dff["监测时间"])
    dff[f"{new_name_split[4]}"] = df1["value"] 
    dff.to_csv(f"E:/DATA/ERA5_sun/csv/{new_name}.csv",encoding="gbk",index = False)

接下来同理处理37个气压层下的再分析数据:

# ### Import arcpy module
import arcpy
import glob
import os
import pandas as pd

folder_path = 'E:/DATA/ERA5_37/'
file_paths = glob.glob(os.path.join(folder_path,"*.nc"))
level_dataset = ["level 1","level 2","level 3","level 5","level 7","level 10","level 20","level 30","level 50","level 70",
                     "level 100","level 125","level 150","level 175","level 200","level 225","level 250","level 300","level 350","level 400",
                     "level 450","level 500","level 550","level 600","level 650","level 700","level 750","level 775","level 800","level 825",
                     "level 850","level 875","level 900","level 925","level 950","level 975","level 1000"]
for file_path in file_paths:
    
    for i in range(len(level_dataset)):

        file_name = os.path.basename(file_path)
        # Local variables:
        station = "E:/DATA/ERA5_37/station.shp"
        v2022_06_bj08_nc = folder_path + file_name
        # d z r t w
        
        d_Layer1 = f"{file_name}_w_{i}"  # 要改
        station__2_ = station
        v1111_xls = folder_path + f"table/{file_name}_w_{i+1}.xls" # 要改

        # Process: 创建 NetCDF 栅格图层
        arcpy.MakeNetCDFRasterLayer_md(v2022_06_bj08_nc, "w", "longitude", "latitude", d_Layer1, "time", level_dataset[i], "BY_VALUE") # 要改

        # Process: 多值提取至点
        arcpy.gp.ExtractMultiValuesToPoints_sa(station, d_Layer1, "NONE")

        # Process: 表转 Excel
        arcpy.TableToExcel_conversion(station__2_, v1111_xls, "NAME", "CODE")

        # 获取 shp 的所有字段
        fields = arcpy.ListFields(station)
        # 获取所有字段的名称
        f_names = []
        for f in fields: f_names.append(f.name)
        # 通过字段名称删除字段
        arcpy.DeleteField_management(station, f_names[6:])




# # 表格整理 更全一点  有改动
import glob
import os
import pandas as pd
import datetime as dt
from datetime import datetime
import time
# # 表格整理
folder_path = 'E:/DATA/ERA5_37/table/'
file_paths = glob.glob(os.path.join(folder_path,"*.xls"))

for file_path in file_paths:
    file_name = os.path.basename(file_path)
    date_replace = file_name.replace(".xls","")
    date_split = date_replace.split("_")
    # 获取气压层数
    qiyaceng = date_split[4]
    # 获取部分日期
    date = date_split[0]+"-"+date_split[1]
    # 新建一个数据框
    file_xls = folder_path + file_name
    df = pd.read_excel(file_xls)
    df1 = pd.DataFrame()
    # df1["站号"] = df.apply(lambda row: row['站号'],axis=1)
    df1 = df.melt(['站号'],df.columns.str.extract(r'(b\d+_.*)',expand=False).dropna(),var_name= '时间',value_name = f'{date_split[3]}')
    df1["过渡时间"] = df1.apply(lambda row: date +"-"+ row['时间'].split("_")[0],axis=1)
    # df1["过渡时间"] = df1.apply(lambda row: date +"-"+ '31',axis=1) # 5月31日
    hour_1 = date_split[2].split(".")
    hour = hour_1[0].replace("bj","")
    
    if hour == '00':
        hour = '24'
        df1["监测时间"] = df1.apply(lambda row: row["过渡时间"].replace("b","")+" "+f"{hour}"+":00:00",axis=1)
    else:
        df1["监测时间"] = df1.apply(lambda row: row["过渡时间"].replace("b","")+" "+f"{hour}"+":00:00",axis=1)
        df1["监测时间"] = pd.to_datetime(df1["监测时间"],format='%Y-%m-%d %H:%M:%S') + pd.to_timedelta(1,unit='D')
    """ 
    df1["监测时间"] = df1.apply(lambda row: row["过渡时间"].replace("b","")+" "+f"{hour}"+":00:00",axis=1)
    df1["监测时间"] = pd.to_datetime(df1["监测时间"])
    """
    
    df1["气压层级"] = qiyaceng
    new_name = file_name.replace(".xls","")
    df1.to_csv(f"E:/DATA/ERA5_37/csv/{new_name}.csv",encoding="gbk")

得到最后整理样式:

降水数据CMORPH爬取:

import re
from bs4 import BeautifulSoup
import urllib.request
import ssl
 
mon = ["06","07","08","09","10"]
for month in mon:
    days = ["01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30"]
    if month =="07" or month =="08" or month=="10":
        days.append("31")
    for day in days:
        TR_url = f'https://www.ncei.noaa.gov/data/cmorph-high-resolution-global-precipitation-estimates/access/hourly/0.25deg/2022/{month}/{day}/'
        my_url = urllib.request.urlopen(TR_url).read().decode('ascii')
        # print(my_url)
        soup = BeautifulSoup(my_url,'lxml')
        
        url_list=soup.find_all(href=re.compile(".nc"))
        print('step1 has fininshed!')
        
        urls=[]
        for i in url_list[1:]:
            urls.append(TR_url+i.get('href'))
        print ('step2 has finished!')
        print(urls)
        for i, url in enumerate(urls):
            file_name = "E:/DATA/降水/"+url.split('/')[-1]
            urllib.request.urlretrieve(url, file_name)
        
        print ('congratulations! you got them all!')

  • 2
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值