读取PANDAS文件
PRE_FRAME=pd.read_csv(r"K:\02 WETHER_SITE\05 RIGHT\04 YEARLY\04 PRE.csv",index_col=0)
按照标签索引
def CV(array):
CV=np.std(np.array(array))/np.mean(np.array(array))
return CV
for i in WEATHER_SITE:
SITE_DATA=PRE_FRAME[str(i)]
for j in range(1982,1993):
j_11=j+11
SITE_DATA_PERIOD=SITE_DATA.loc[j:j_11]
CV1=CV(SITE_DATA_PERIOD)
print(CV1)
区域平均
pip install rasterstats==0.6.1/0.10.3
files = glob.glob(r'J:\09 BUDYKO_CHINA\01 DATA\01 CV\01 TIF\*.tif')
for f_in in files:
# tif01 = r"L:\00 WANG_PROJECT\02 WORLD_ET\RESAMPLE\01 ET\EARTH2OB ET\EARTH2OB_ET_YR "+str(i)+".nc"
shpFilePath=r"J:\09 BUDYKO_CHINA\02 SHP\CATHMENTS01.shp"
Stats_type = 'mean'
zonal_method = zonal_stats(shpFilePath, f_in, stats=[Stats_type],nodata=np.nan)
print(len(zonal_method))
pp = zonal_method
print(pp)
水平堆叠数组
np.hstack((index1,tem_max1))
DATAFRAME缺失值处理
#option 1 将含有缺失值的行去掉
housing.dropna(subset=["total_bedrooms"])
#option 2 将"total_bedrooms"这一列从数据中去掉
housing.drop("total_bedrooms", axis=1)
# option 3 使用"total_bedrooms"的中值填充缺失值
median = housing["total_bedrooms"].median()
housing["total_bedrooms"].fillna(median)
XARRAY选择时间
XGBoost1987=XGBoost['n'].sel(time=pd.to_datetime("1987-12-31")).data
PANDAS选择多行
df.loc[df['column_name'] == some_value]
df.loc[df['column_name'].isin(some_values)]
df.loc[df['column_name'] != some_value]
df.loc[~df['column_name'].isin(some_values)
df.loc[(df['column'] == some_value) & df['other_column'].isin(some_values)]
双线性插值
olr=xr.open_dataset(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\03 NC_CLIP\\GLEAM_PERIOD 1987.nc")["Evapotranspiration"]
EVAP=xr.open_dataset(r"K:\09 BUDYKO_CHINA\09 GRIDDING_MODEL\ET_ExT.nc")
EVAP_REGRID= (EVAP.interp(lat=olr.lat.values, lon=olr.lon.values))
EVAP_REGRID.to_netcdf(r"K:\09 BUDYKO_CHINA\09 GRIDDING_MODEL\ET_GLEAM.nc")
MASK操作
os.environ['PROJ_LIB'] = r'H:\01 ANA\envs\GEO\Library\share\proj'
#
for i in range(1987,2006):
ds = xr.open_dataset(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLASS\01 NC\\GLASS_PERIOD "+str(i)+".nc")
print(ds)
####geopandas 3.8可用
cn_shp = r'K:\09 BUDYKO_CHINA\02 SHP\EXCEPT_XIBEI_DIS_WGS.shp'
shp = gp.read_file(cn_shp)
# print(shp.crs)
# import pyproj
#
# shp.crs = pyproj.CRS.from_user_input('EPSG:4326')
# shp.set_crs(epsg=4326)
# shp.plot()
lon = ds["lon"][:]
lat = ds["lat"][:]
mask = regionmask.mask_geopandas(shp, lon, lat)
print(mask.min())
tmep_cn = ds.where((mask >= 0))
print(tmep_cn)
olr = xr.open_dataset(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\03 NC_CLIP\GLEAM_PERIOD 1987.nc")
计算线性趋势
def lm_trend(x):
if np.isnan(x).sum() > 25:
return (np.nan ,np.nan)
else :
years = np.arange(1,34)
years = sm.add_constant(years)
model = sm.OLS(x,years)
result = model.fit()
#print(result.summary())
slope = result.params[1]
p = result.pvalues[1]
return(slope , p )
或
def trend_3d(data):
import xarray as xr
from scipy import stats
def trend(inputdata):
if not np.isnan(inputdata.any()):
slope,intercept,r,p,stderr=stats.linregress(range(len(inputdata)),inputdata)
return slope,p
trend,p = xr.apply_ufunc(trend,data,input_core_dims=[['time']],output_core_dims=[[],[]],vectorize = True)
return trend,p
计算相关系数
import xarray as xr
import numpy as np
import warnings
warnings.filterwarnings( "ignore" )
from scipy import stats
def corr_3d(x,y):
nlat=y.shape[1]
nlon=y.shape[2]
corr=np.full([nlat,nlon],np.nan)
pval=np.full([nlat,nlon],np.nan)
for i in range(nlat):
for j in range(nlon):
if np.isnan(y[0,i,j]) or np.isnan(x[0,i,j]):
continue
else:
r,p=stats.pearsonr(x[:,i,j],y[:,i,j])
corr[i,j]=r
pval[i,j]=p
lat = y.coords['lat']
lon = y.coords['lon']
corr=xr.DataArray(corr,coords=[lat,lon],dims=['latitude','longitude'])
pval=xr.DataArray(pval,coords=[lat,lon],dims=['latitude','longitude'])
return corr,pval
var = xr.open_dataset('./var.nc')
tmp = xr.open_dataset('./tmp.nc')
corr,pval=corr_3d(var['X'],tmp['tmp'])
corr.to_netcdf('./V_tmp_corr.nc')
pval.to_netcdf('./V_tmp_pva.nc')
计算hurst指数
Hurst 指数 H 是定量描述时间序列自相似性与长程依赖性的有效方法。H 取值为 0 ~ 1, 当 H = 0. 5 时, 则时间序列为相互独立、方差有限的随机序列; 当0. 5 < H < 1 时, 表明时间序列变化具有持续性, 未来的变化将与过去的变化趋势相一致; 当 0 < H < 0. 5 时,表明时间序列具有反持续性, 即过去的变化不具有可持续性。
import numpy as np
from osgeo import gdal
import os
"""
计算hurst指数
"""
def Hurst(x):
# x为numpy数组
n = x.shape[0]
t = np.zeros(n - 1) # t为时间序列的差分
for i in range(n - 1):
t[i] = x[i + 1] - x[i]
mt = np.zeros(n - 1) # mt为均值序列,i为索引,i+1表示序列从1开始
for i in range(n - 1):
mt[i] = np.sum(t[0:i + 1]) / (i + 1)
# Step3累积离差和极差,r为极差
r = []
for i in np.arange(1, n): # i为tao
cha = []
for j in np.arange(1, i + 1):
if i == 1:
cha.append(t[j - 1] - mt[i - 1])
if i > 1:
if j == 1:
cha.append(t[j - 1] - mt[i - 1])
if j > 1:
cha.append(cha[j - 2] + t[j - 1] - mt[i - 1])
r.append(np.max(cha) - np.min(cha))
s = []
for i in np.arange(1, n):
ss = []
for j in np.arange(1, i + 1):
ss.append((t[j - 1] - mt[i - 1]) ** 2)
s.append(np.sqrt(np.sum(ss) / i))
r = np.array(r)
s = np.array(s)
xdata = np.log(np.arange(2, n))
ydata = np.log(r[1:] / s[1:])
h, b = np.polyfit(xdata, ydata, 1)
return h
合并表格
import pandas as pd
import numpy as np
MODELNAME=["01 MLR","01 MLR_02","02 SVM","03 ANN","04 RF","05 ExT","06 XGBoost","07 LIGHTGBM","08 CAT"]
DATANAME=["_TRAIN","_VALI","_TEST"]
for i in MODELNAME:
path = r"K:\09 BUDYKO_CHINA\09 GRIDDING_MODEL\\"
train_predict=pd.read_csv(path+i+"\\train_predict.csv",index_col=0)
vali_predict=pd.read_csv(path+i+"\\vali_predict.csv",index_col=0)
test_predict=pd.read_csv(path+i+"\\test_predict.csv",index_col=0)
ALL_predict=pd.concat([train_predict,vali_predict], ignore_index=True)
ALL1_predict=pd.concat([ALL_predict,test_predict], ignore_index=True)
print(ALL1_predict)
MASK操作
import regionmask
import xarray as xr
import geopandas as gp
import os
ds = xr.open_dataset(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\01 NC\\GLEAM_PERIOD "+str(i)+".nc")
print(ds)
####geopandas 3.8可用
cn_shp = r'K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\SHP\ena.shp'
shp = gp.read_file(cn_shp)
lon = ds["lon"][:]
lat = ds["lat"][:]
mask = regionmask.mask_geopandas(shp, lon, lat)
print(mask.min())
tmep_cn = ds.where((mask >= 0))
print(tmep_cn)
# olr = xr.open_dataset(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\03 NC_CLIP\GLEAM_PERIOD 1987.nc")
# EVAP_REGRID = tmep_cn.interp(lat=olr.lat.values, lon=olr.lon.values)
lon1 = ds1["lon"][:]
lat1 = ds1["lat"][:]
EVAP_REGRID=tmep_cn.sel(lat=lat1,lon=lon1)
tmep_cn.to_netcdf(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\04 CLIP_SHP\\GLEAM_PERIOD "+str(i)+".nc")
重采样
import xarray as xr
olr=xr.open_dataset(r"K:\09 BUDYKO_CHINA\08 GRIDDING\GLEAM\03 NC_CLIP\\GLEAM_PERIOD 1987.nc")["Evapotranspiration"]
EVAP=xr.open_dataset(r"K:\09 BUDYKO_CHINA\09 GRIDDING_MODEL\ET_ExT.nc")
EVAP_REGRID= (EVAP.interp(lat=olr.lat.values, lon=olr.lon.values))
EVAP_REGRID.to_netcdf(r"K:\09 BUDYKO_CHINA\09 GRIDDING_MODEL\ET_GLEAM.nc")