仿照这个代码对CMIP6中的pp数据进行降维:import xarray as xr
import numpy as np
# 1. 读取原始文件
file_path = r'E:\Except-Desktop\DC\CMIP6-download\historical-pp\pp_c2.nc'
ds = xr.open_dataset(file_path)
print(ds)
# 2. 提取并展平 lat/lon
lat_2d = ds['lat'].values
lon_2d = ds['lon'].values
lat_1d = lat_2d.flatten()
lon_1d = lon_2d.flatten()
# 3.构建新坐标
new_coords_flat = {
'grid_point': (['grid_point'], np.arange(len(lat_1d))),
'lat': (['grid_point'], lat_1d),
'lon': (['grid_point'], lon_1d),
}
if 'time' in ds.coords:
new_coords_flat['time'] = ds['time']
if 'lev' in ds.coords:
new_coords_flat['lev'] = ds['lev']
elif 'lev' in ds.dims:
new_coords_flat['lev'] = (['lev'], np.arange(ds.dims['lev']))
# 4. 构建新的变量
data_vars_flat = {}
for var_name in ds.data_vars:
var = ds[var_name]
dims = list(var.dims)
new_dims = []
for d in dims:
if d == 'nlat' or d == 'nlon':
if 'grid_point' not in new_dims:
new_dims.append('grid_point')
else:
new_dims.append(d)
data = var.values
if 'nlat' in var.dims and 'nlon' in var.dims:
nlat_pos = var.dims.index('nlat')
nlon_pos = var.dims.index('nlon')
shape = list(data.shape)
shape[nlat_pos:nlon_pos+1] = [shape[nlat_pos] * shape[nlon_pos]]
data = data.reshape(shape)
data_vars_flat[var_name] = (new_dims, data, var.attrs)
# 5. 创建展平后的数据集
flat_ds = xr.Dataset(data_vars=data_vars_flat, coords=new_coords_flat)
flat_ds.attrs = ds.attrs
# 6. 进行重构:从 grid_point -> lat/lon 二维结构
lat_1d_flat = flat_ds['lat'].values
lon_1d_flat = flat_ds['lon'].values
lat_unique = np.unique(lat_1d_flat)
lon_unique = np.unique(lon_1d_flat)
# 建立 (lat, lon) 到 grid_point 的映射
lat_lon_to_point = {}
for idx, (lat, lon) in enumerate(zip(lat_1d_flat, lon_1d_flat)):
lat_lon_to_point[(lat, lon)] = idx
# 准备新坐标
new_coords_restored = {
'time': (['time'], flat_ds['time'].values),
'lat': (['lat'], lat_unique),
'lon': (['lon'], lon_unique),
}
# 安全处理 lev 坐标
if 'lev' in flat_ds.coords:
new_coords_restored['lev'] = (['lev'], flat_ds['lev'].values)
elif 'lev' in flat_ds.dims:
new_coords_restored['lev'] = (['lev'], np.arange(flat_ds.dims['lev']))
else:
print("Warning: 'lev' not found in dataset.")
# 重构数据
value = flat_ds['pp'].values
shape = value.shape
new_shape = (shape[0], shape[1], len(lat_unique), len(lon_unique))
reshaped = np.full(new_shape, np.nan)
for i in range(len(lat_unique)):
for j in range(len(lon_unique)):
lat_val = lat_unique[i]
lon_val = lon_unique[j]
if (lat_val, lon_val) in lat_lon_to_point:
idx = lat_lon_to_point[(lat_val, lon_val)]
reshaped[:, :, i, j] = value[:, :, idx]
# 构建数据变量
data_vars_restored = {
'pp': (['time', 'lev', 'lat', 'lon'], reshaped, flat_ds['pp'].attrs),
}
# 处理边界数据
if 'lat_bnds' in flat_ds:
lat_bnds_1d = flat_ds['lat_bnds'].values
lat_bnds = np.full((len(lat_unique), 2), np.nan)
for i in range(len(lat_unique)):
lat_val = lat_unique[i]
for lon_val in lon_unique:
if (lat_val, lon_val) in lat_lon_to_point:
idx = lat_lon_to_point[(lat_val, lon_val)]
lat_bnds[i, :] = lat_bnds_1d[idx, [0, -1]]
break
data_vars_restored['lat_bnds'] = (['lat', 'bnds'], lat_bnds)
if 'lon_bnds' in flat_ds:
lon_bnds_1d = flat_ds['lon_bnds'].values
lon_bnds = np.full((len(lon_unique), 2), np.nan)
for j in range(len(lon_unique)):
lon_val = lon_unique[j]
for lat_val in lat_unique:
if (lat_val, lon_val) in lat_lon_to_point:
idx = lat_lon_to_point[(lat_val, lon_val)]
lon_bnds[j, :] = lon_bnds_1d[idx, [0, -1]]
break
data_vars_restored['lon_bnds'] = (['lon', 'bnds'], lon_bnds)
# 创建最终数据集
final_ds = xr.Dataset(data_vars=data_vars_restored, coords=new_coords_restored)
final_ds.attrs = flat_ds.attrs
# 保存结果
output_path = r'E:\Except-Desktop\DC\CMIP6-download\historical-pp\pp_c2_restored.nc'
final_ds.to_netcdf(output_path)
print(f"转换后的文件已保存到: {output_path}")
# 打印结构
print("\n转换后数据集结构:")
print(xr.open_dataset(output_path))
pp的相关信息为:<xarray.Dataset> Size: 12GB
Dimensions: (time: 1032, lev: 15, nlat: 384, nlon: 320, d2: 2, vertices: 4)
Coordinates:
lat (nlat, nlon) float64 983kB ...
* lev (lev) float64 120B 500.0 1.5e+03 2.5e+03 ... 1.35e+04 1.45e+04
lon (nlat, nlon) float64 983kB ...
* nlat (nlat) int32 2kB 1 2 3 4 5 6 7 8 ... 378 379 380 381 382 383 384
* nlon (nlon) int32 1kB 1 2 3 4 5 6 7 8 ... 314 315 316 317 318 319 320
* time (time) object 8kB 2015-01-15 13:00:00.000008 ... 2100-12-15 12...
Dimensions without coordinates: d2, vertices
Data variables:
pp (time, lev, nlat, nlon) float32 8GB ...
time_bnds (time, d2) object 17kB ...
lat_bnds (time, nlat, nlon, vertices) float32 2GB ...
lon_bnds (time, nlat, nlon, vertices) float32 2GB ...
lev_bnds (time, lev, d2) float32 124kB ...
Attributes: (12/45)
Conventions: CF-1.7 CMIP-6.2
activity_id: ScenarioMIP
branch_method: standard
branch_time_in_child: 735110.0
branch_time_in_parent: 735110.0
case_id: 43
... ...
sub_experiment_id: none
table_id: Omon
tracking_id: hdl:21.14100/0208fb60-6f8c-40f4-a0bf-127e8e5e8052
variable_id: pp
variant_info: CMIP6 CESM2 future scenario SSP5-8.5 between 2014...
variant_label: r1i1p1f1
最新发布