准备工作:
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import netCDF4 as nc
import numpy as np
import datetime as dt
nc文件的创建和初始化:
ncfile = nc.Dataset(r"E:\ERA5-L_frozen.nc", mode='w', format='NETCDF4')
#创建时空维度
time_dim = ncfile.createDimension('time',40)
lat_dim = ncfile.createDimension('latitude', 40)
lon_dim = ncfile.createDimension('longitude', 80)
# 创建经纬度变量
lat_var = ncfile.createVariable('latitude', np.float32, ('latitude',))
lon_var = ncfile.createVariable('longitude', np.float32, ('longitude',))
lon_range = np.arange(90.05, 98.05, 0.1) #80
lat_range = np.arange(32.05, 36.05, 0.1) #40
lat_var[:] = lat_range
lon_var[:] = lon_range
# 创建时间变量
time_var = ncfile.createVariable('time', str, ('time',))
#'f8'
# 设置时间范围和分辨率
time_var = ncfile.createVariable('time', 'str', ('time',))
time_vec = [str(year) for year in range(1979, 2019)]
time_var.units = 'yyyy'
time_vec = np.array(time_vec)
ncfile['time'][:] = time_vec #只能使用np数组赋值,list对象不能直接赋值
ncfile.close() #panoply读取未报错
文件的读取和变量的创建:
ncfile = nc.Dataset(r"E:\ERA5-L_frozen.nc", mode='r+')
#r+读写,打开现有文件
#先根据经纬度范围计算各点对应的索引,并按照顺序填入相关数据
path_t = r"E:rvic_domain.xlsx"
df_t = pd.read_excel(path_t , index_col=0 , header=0)
lon_no = round(df_t['lon_no']) #读入文件并读取索引
lat_no = round(df_t['lat_no'])
#创建有时间轴的geo2D对象
F_ERA_var = ncfile.createVariable('F_1.0', np.float32,
('time', 'latitude', 'longitude'), fill_value=-999)
T_ERA_var = ncfile.createVariable('T_1.0', np.float32,
('time', 'latitude', 'longitude'), fill_value=-999)
#为有时间轴的geo2D对象进行变量赋值
lon_no = round(df_de1pr['lon_no'])
lat_no = round(df_de1pr['lat_no'])
for i in range(1,1353):
for j in range (40):
ncfile['F_1.0'][j,lat_no[i],lon_no[i]] = df_f.iloc[i-1,j]
ncfile['T_1.0'][j,lat_no[i],lon_no[i]] = df_t.iloc[i-1,j]
------------------------------------------
#没有时间轴的geo2D对象
mask_var = ncfile.createVariable('mask', np.float32,
('latitude', 'longitude'), fill_value=-999)
#为没有时间轴的geo2D对象赋值
for i in range(1,1353):
ncfile['mask'][lat_no[i],lon_no[i]] = df_t.loc[i,'mask']
ncfile['elev'][lat_no[i],lon_no[i]] = df_t.loc[i,'elev']
ncfile['area'][lat_no[i],lon_no[i]] = df_t.loc[i,'area']
ncfile['frac'][lat_no[i],lon_no[i]] = df_t.loc[i,'frac']
为nc文件中变量添加属性:
dataset = nc.Dataset(r"E:\ts.nc", 'a')
variable = dataset.variables['area']
variable.units = 'm2'
variable2 = dataset.variables['elev']
variable2.units = 'm'
dataset.sync() # 更新文件
dataset.close() # 关闭文件
合并多个有相同坐标的nc文件并进行重采样:
file_all=[]
path_folder = r"E:\ERA5"
lst_files = os.listdir(path_folder)
for name in lst_files:
fn=os.path.join(path_folder,name)
file_all.append(fn) #得到所有文件的完整路径
ds = xr.open_mfdataset(file_all)
#合并为一个 DataSet 对象
daily_mean = ds.resample(time='1D').mean(dim='time')
#按照日期重采样,并求每日平均值
daily_mean.to_netcdf(r"E:\2mtp_daily_1979-2020.nc")
按照经纬度为某个变量赋值:
path_t = r"E:\学习\研究生学习\大论文\VIC\RVIC\latlon_1352.csv"
df_t = pd.read_csv(path_t , index_col=0 , header=0)
lon_no = round(df_t['lon_no'])
lat_no = round(df_t['lat_no'])
#经纬度索引,
import netCDF4 as nc
ncfile = nc.Dataset(r"E:\cjy_2a_runoff.nc", mode='r+')
---------------老方法,比较慢-----------------------
start_time = time.time()
for i in range(1,1353):
for j in range (days):
ncfile['Runoff'][j,lat_no[i],lon_no[i]] = df_ts.iloc[j,i-1]
end_time = time.time()
run_time = end_time - start_time
print("程序运行时间:", run_time, "秒")
(731,40,80)程序运行时间: 231.21797966957092 秒
-------------新方法,快很多-------------------------------
# 创建临时数组,与ncfile['Runoff']具有相同的维度和形状
temp_array = np.empty(ncfile['Runoff'].shape) #(731, 40, 80)
# 将所有数据一次性放入临时数组中
for i in range(1, 1353):
temp_array[:, lat_no[i], lon_no[i]] = df_ts.iloc[:, i-1]
# 使用切片操作将整个临时数组一次性写入NetCDF文件
ncfile['Runoff'][:] = temp_array
程序运行时间: 5.041003227233887 秒