一、Theil-Sen Median 变化趋势分析
Theil-Sen Median 方法能反映变化趋势,公式如下:
β v = M e d i a n ( v j ‾ − v i ‾ j − i ) \beta_{v}=Median\left(\frac{\overline{v_j}-\overline{v_i}}{j-i}\right) βv=Median(j−ivj−vi)
其中 v v v 表示变化的指标, v j ‾ \overline{v_j} vj 和 v i ‾ \overline{v_i} vi 表示第 j j j 年和第 i i i 年指标 f f f 的平均值。当 β v > 0 \beta_v>0 βv>0 时,表明指标变化呈改善趋势,反之降低。
二、Mann-Kendall 检验
Mann-Kendall 方法能检验变化趋势显著性,公式如下:
S
=
∑
i
=
1
n
−
1
∑
j
=
i
+
1
n
f
(
v
‾
j
−
v
‾
i
)
S=\sum_{i=1}^{n-1}{\sum_{j=i+1}^{n}{f\left(\overline v_j-\overline v_i\right)}}
S=i=1∑n−1j=i+1∑nf(vj−vi)
f
(
v
‾
j
−
v
‾
i
)
=
{
1
,
v
‾
j
−
v
‾
i
>
0
0
,
v
‾
j
−
v
‾
i
=
0
−
1
,
v
‾
j
−
v
‾
i
<
0
f\left(\overline v_j-\overline v_i\right)=\left\{\begin{matrix} 1, & \overline v_j-\overline v_i>0 \\ 0, & \overline v_j-\overline v_i=0 \\ -1, & \overline v_j-\overline v_i<0 \end{matrix}\right.
f(vj−vi)=⎩
⎨
⎧1,0,−1,vj−vi>0vj−vi=0vj−vi<0
方差: V ( S ) = n ( n − 1 ) ( 2 n + 5 ) 18 V(S)=\frac{n(n-1)(2n+5)}{18} V(S)=18n(n−1)(2n+5) ,统计量:
Z = { S − 1 V ( S ) , S > 0 0 , S = 0 S + 1 V ( S ) , S < 0 Z=\left\{\begin{matrix} \frac{S-1}{\sqrt{V(S)}}, & S>0 \\ 0, & S=0 \\ \frac{S+1}{\sqrt{V(S)}}, & S<0 \end{matrix}\right. Z=⎩ ⎨ ⎧V(S)S−1,0,V(S)S+1,S>0S=0S<0
其中 n n n 表示指标变化的时间长度, f f f 为符号函数,统计量 Z Z Z 的取值范围为 ( − ∞ , + ∞ ) \left(-\infty,+\infty\right) (−∞,+∞) ,表示在给定显著水平 α \alpha α 下,当 ∣ Z ∣ > μ 1 − α 2 \left|Z\right|>\mu_{1-\frac{\alpha}{2}} ∣Z∣>μ1−2α 时,趋势显著。
三、Hurst 指数分析
给定指标 v v v 的时间序列 v t , t = 1 , 2 , ⋯ , n v_t,t=1,2,\cdots,n vt,t=1,2,⋯,n ,对于任意正整数 u u u ,定义均值序列、累计差、极差和标准差如下:
v
u
‾
=
1
u
∑
t
=
1
u
v
t
,
u
=
1
,
2
,
⋯
,
n
\overline{v_u}=\frac{1}{u}\sum_{t=1}^u{v_t},u=1,2,\cdots,n
vu=u1t=1∑uvt,u=1,2,⋯,n
X
(
t
,
u
)
=
∑
t
=
1
u
v
t
−
v
u
,
1
≤
t
≤
u
X(t,u)=\sum_{t=1}^u{v_t-v_u},1\le t\le u
X(t,u)=t=1∑uvt−vu,1≤t≤u
R
u
=
max
1
≤
t
≤
u
X
(
t
,
u
)
,
u
=
1
,
2
,
⋯
,
n
R_u=\max_{1\le t\le u}X(t,u),u=1,2,\cdots,n
Ru=1≤t≤umaxX(t,u),u=1,2,⋯,n
S
u
=
1
u
∑
t
=
1
u
(
v
t
−
v
u
‾
)
2
S_u=\sqrt{\frac{1}{u}\sum_{t=1}^{u}\left(v_t-\overline{v_u}\right)^2}
Su=u1t=1∑u(vt−vu)2
若存在 R S ∝ u H \frac{R}{S}\propto u^H SR∝uH ,表示该时间序列的指标存在 Hurst 现象,大小用 H H H 表示,若 H = 0.5 H=0.5 H=0.5 则该指标自相关性为 0 ,即时间序列前后变化是完全随机的;当 0.5 < H < 1 0.5<H<1 0.5<H<1 ,表示该指标自相关性大于 0 ,即时间序列前后变化一致,且越接近 1 持续性能力越强;当 0 < H < 0.5 0<H<0.5 0<H<0.5 ,表示该指标时间序列自相关性小于 0 ,越接近 0 反持续性能力越强。
四、代码
import os
import numpy as np
from osgeo import gdal
def read_img(filename):
# 打开文件
dataset = gdal.Open(filename)
# 获取栅格数据的元信息
im_width = dataset.RasterXSize # 栅格矩阵的列数
im_height = dataset.RasterYSize # 栅格矩阵的行数
im_bands = dataset.RasterCount # 波段数
im_geotrans = dataset.GetGeoTransform() # 仿射矩阵,左上角像素的大地坐标和像素分辨率
im_proj = dataset.GetProjection() # 地图投影信息,字符串表示
# 读取栅格数据到数组
im_data = dataset.ReadAsArray(0, 0, im_width, im_height)
# 返回元信息、投影、仿射矩阵和数据数组
return im_proj, im_geotrans, im_data
def write_img(filename, im_proj, im_geotrans, im_data, driverName="GTiff"):
# 判断栅格数据的数据类型
if "int8" in im_data.dtype.name:
datatype = gdal.GDT_Byte
elif "int16" in im_data.dtype.name:
datatype = gdal.GDT_UInt16
else:
datatype = gdal.GDT_Float32
# 判读数组维数
if len(im_data.shape) == 3:
im_bands, im_height, im_width = im_data.shape
else:
im_bands, (im_height, im_width) = 1, im_data.shape
# 创建文件
driver = gdal.GetDriverByName(driverName)
dataset = driver.Create(filename, im_width, im_height, im_bands, datatype)
# 写入仿射变换参数
dataset.SetGeoTransform(im_geotrans)
# 写入投影
dataset.SetProjection(im_proj)
# 写入数组数据
if im_bands == 1:
dataset.GetRasterBand(1).WriteArray(im_data)
else:
for i in range(im_bands):
dataset.GetRasterBand(i + 1).WriteArray(im_data[i])
def theil_sen_slope(data):
n_timepoints, height, width = data.shape
i, j = np.triu_indices(n_timepoints, k=1) # 获取数据索引的上三角部分
index_diff = i - j
index_diff = index_diff[:, np.newaxis]
# 将数据的时间轴展平,便于进行矩阵操作
flat_data = data.reshape(n_timepoints, -1)
# 计算斜率
slope_values = (flat_data[j] - flat_data[i]) / index_diff
# 根据每列计算中位数
slopes = np.median(slope_values, axis=0)
# 将结果重新形状成与原始数据的高度和宽度相匹配的形状
slopes = slopes.reshape(height, width)
return slopes
def mann_kendall_test(data):
height, width = data.shape[1], data.shape[2]
Z_values = np.zeros((height, width))
for h in range(height):
for w in range(width):
data_values = data[:, h, w]
n = len(data_values)
i, j = np.triu_indices(n, k=1) # 获取数据索引的上三角部分
slope_values = data_values[j] - data_values[i]
# 计算 sgn(slope_j - slope_i)
sgn_values = np.sign(slope_values)
# 计算 S
S = np.sum(sgn_values)
# 计算 var(S)
var_S = (n * (n - 1) * (2 * n + 5)) / 18
# 计算 Z
if S > 0:
Z = (S - 1) / np.sqrt(var_S)
elif S < 0:
Z = (S + 1) / np.sqrt(var_S)
else:
Z = 0
Z_values[h, w] = Z
return Z_values
def Hurst(data):
from sklearn.linear_model import LinearRegression
n_timepoints, height, width = data.shape
cumsum_data = np.cumsum(data, axis=0)
divisor = np.arange(1, n_timepoints + 1)[:, np.newaxis, np.newaxis]
means = cumsum_data / divisor
i, j = np.triu_indices(n_timepoints, k=0)
point = list(zip(i, j))
X_t_tau = [[] for _ in range(n_timepoints)]
for x, y in point:
X_t_tau[y].append(cumsum_data[x] - means[y] * (x + 1))
R_tau = []
for row in X_t_tau:
tmp = np.zeros((height, width))
row = np.array(row)
for h in range(height):
for w in range(width):
tmp[h, w] = np.max(row[:, h, w]) - np.min(row[:, h, w])
R_tau.append(tmp)
R_tau = np.stack(R_tau)
cumsum_data = np.cumsum((data - means)**2, axis=0)
S_tau = (cumsum_data / divisor)**0.5
log_tau = np.log(np.arange(1, n_timepoints + 1))
log_R_S_ratio = np.log(R_tau / S_tau)
model = LinearRegression()
model.fit(log_tau.reshape(-1, 1), log_R_S_ratio.reshape(n_timepoints, -1))
fit_result = model.coef_.reshape(height, width)
# for h in range(height):
# for w in range(width):
# X_current_position = mean_log_R_S_ratio[:, h, w]
# model.fit(X_current_position, log_tau)
# fit_result[h, w] = model.coef_[0]
return fit_result
def run():
data_path = "E:/by/dataset/生态系统质量/"
out_path = "E:/by/output/生态系统质量/"
if not os.path.exists(out_path):
os.makedirs(out_path)
files = [file for file in os.listdir(data_path) if file.endswith(".tif")]
data_list = []
for file in files:
proj, geotrans, band = read_img(data_path + file)
data = np.ma.masked_where(
np.logical_or(band == np.finfo(np.float32).min, np.isnan(band)), band
)
data_list.append(data)
# 将列表转换为数组
data = np.stack(data_list, axis=0)
# theil_sen_slope
# tss = theil_sen_slope(data)
# write_img(out_path + "tss.tif", proj, geotrans, tss)
# print('theil_sen_slope finished!')
# mann_kendall_test
# mk = mann_kendall_test(data)
# write_img(out_path + "mk.tif", proj, geotrans, mk)
# print('mann_kendall_test finished!')
# Hurst
hurst = Hurst(data)
write_img(out_path + "hurst.tif", proj, geotrans, hurst)
print('hurst finished!')
if __name__ == "__main__":
run()