1、背景(我要干的事儿):
自变量(4个):tem、pre、iet、iep
因变量(3个):SOS、EOS、LOS
目标:逐像素并行计算上述3x4=12个组合的偏相关系数
出处声明:本代码是在借鉴这篇Python多线程计算逐像元偏相关性分析文章的基础上,加入偏相关系数显著性(p值)的计算,并结合自己情况,一口气输出上述12个组合的结果。此处感谢该文大佬~
2、代码
代码分为 ‘process(函数)’ 和 ‘实际运行’ 两个.py文件在pycharm中运行(这样避免了‘溢出’报错)
Part 1-process.py
#%%
import rasterio
import numpy as np
from sklearn.linear_model import LinearRegression
from multiprocessing import Pool
import itertools
import numpy as np
import pickle
from scipy import stats
# 忽略除以零等运行时警告
np.seterr(divide='ignore', invalid='ignore')
'''
目标:计算偏相关系数+t检验显著性
输入:两个变量(x和y)在控制其他变量的情况下的偏相关系数
输出:偏相关系数+p值
'''
def calculate_partial_correlation(x, y, controls):
# 检查NaN值
if np.isnan(x).any() or np.isnan(y).any() or np.isnan(controls).any():
return np.nan
# 构建模型并预测残差
model = LinearRegression().fit(controls, y)
residuals_y = y - model.predict(controls)
model = LinearRegression().fit(controls, x)
residuals_x = x - model.predict(controls)
# 计算偏相关系数 返回一个2x2的矩阵,其中 [0, 1] 或 [1, 0] 元素则是 residuals_x 和 residuals_y 之间的相关系数。
partial_corr = np.corrcoef(residuals_x, residuals_y)[0, 1]
# 计算样本数量和控制变量数量
n = len(controls)
k = controls.shape[1]
# 计算偏相关系数的标准误差
SE_partial_corr = 1 / np.sqrt(n - k - 3)
# 计算t值
t_value = partial_corr / SE_partial_corr
# 计算双尾t检验的p值
p_value = 2 * (1 - stats.t.cdf(abs(t_value), df=n - k - 2))
return partial_corr, p_value
'''
目的:计算偏相关系数
输入:sos/eos/los/tem/pre/iet/iep-stacked三维数据
输出:sos/eos/los的偏相关系数
'''
def worker(pixel_data):
i, j, data = pixel_data
x_sos = data['sos']
x_eos = data['eos']
x_los = data['los']
y_tem = data['tem']
y_pre = data['pre']
y_iet = data['iet']
y_iep = data['iep']
# Check for NaN or infinite values in the data
invalid_data = (np.isnan(x_sos) | np.isnan(x_eos) | np.isnan(x_los) |
np.isnan(y_tem) | np.isnan(y_pre) | np.isnan(y_iet) | np.isnan(y_iep) |
np.isinf(x_sos) | np.isinf(x_eos) | np.isinf(x_los) |
np.isinf(y_tem) | np.isinf(y_pre) | np.isinf(y_iet) | np.isinf(y_iep))
if invalid_data.any():
# Skip this data point and return None for all correlations
return (i, j) + (None,) * 12
controls_tem = np.column_stack([y_pre, y_iet, y_iep])
controls_pre = np.column_stack([y_tem, y_iet, y_iep])
controls_iet = np.column_stack([y_tem, y_pre, y_iep])
controls_iep = np.column_stack([y_tem, y_pre, y_iet])
# Check for NaN or infinite values in the control variables
invalid_controls = (np.isnan(controls_tem) | np.isnan(controls_pre) |
np.isnan(controls_iet) | np.isnan(controls_iep) |
np.isinf(controls_tem) | np.isinf(controls_pre) |
np.isinf(controls_iet) | np.isinf(controls_iep))
if invalid_controls.any():
# Skip this data point and return None for all correlations
return (i, j) + (None,) * 12
#sos
sos_tem_corr,sos_tem_p = calculate_partial_correlation(x_sos, y_tem, controls_tem)
sos_pre_corr,sos_pre_p = calculate_partial_correlation(x_sos, y_pre, controls_pre)
sos_iet_corr,sos_iet_p = calculate_partial_correlation(x_sos, y_iet, controls_iet)
sos_iep_corr,sos_iep_p = calculate_partial_correlation(x_sos, y_iep, controls_iep)
# eos
eos_tem_corr, eos_tem_p = calculate_partial_correlation(x_eos, y_tem, controls_tem)
eos_pre_corr, eos_pre_p = calculate_partial_correlation(x_eos, y_pre, controls_pre)
eos_iet_corr, eos_iet_p = calculate_partial_correlation(x_eos, y_iet, controls_iet)
eos_iep_corr, eos_iep_p = calculate_partial_correlation(x_eos, y_iep, controls_iep)
# los
los_tem_corr, los_tem_p = calculate_partial_correlation(x_los, y_tem, controls_tem)
los_pre_corr, los_pre_p = calculate_partial_correlation(x_los, y_pre, controls_pre)
los_iet_corr, los_iet_p = calculate_partial_correlation(x_los, y_iet, controls_iet)
los_iep_corr, los_iep_p = calculate_partial_correlation(x_los, y_iep, controls_iep)
return (i, j, \
sos_tem_corr, sos_pre_corr, sos_iet_corr, sos_iep_corr, \
eos_tem_corr, eos_pre_corr, eos_iet_corr, eos_iep_corr, \
los_tem_corr, los_pre_corr, los_iet_corr, los_iep_corr, \
sos_tem_p, sos_pre_p, sos_iet_p, sos_iep_p, \
eos_tem_p, eos_pre_p, eos_iet_p, eos_iep_p, \
los_tem_p, los_pre_p, los_iet_p, los_iep_p)
Part 2-实际运行.py
#%%
import rasterio
from sklearn.linear_model import LinearRegression
from multiprocessing import Pool
import itertools
import numpy as np
import pickle
import os
import sys
from PIL import Image
import matplotlib.pyplot as plt
from tifffile import imread, imwrite
from osgeo import gdal
import os
from tqdm import tqdm
lib_path = r'D:\code\MCD12Q2.006_01-19'#process.py所在文件夹路径
sys.path.append(os.path.abspath(lib_path))
from partial_corr_process import worker, calculate_partial_correlation
# 忽略除以零等运行时警告
np.seterr(divide='ignore', invalid='ignore')
#%%
'''
目的:读取栅格数据
输入:文件路径
输出:读取数组、图像元数据,例如数据类型、数据范围等
'''
def read_raster(file_path):
with rasterio.open(file_path) as src:
return src.read(1), src.meta
#%%
#主函数
def main():
# 读取数据
years = range(2013, 2020)
# 文件路径-字典
file_paths = {
var: [f'{var}\\data_{var}_{year}.tif' for year in years]
for var in ['sos', 'eos', 'los', 'tem', 'pre', 'iet', 'iep']}
#print('file_paths', file_paths)
# 读取tif图像-字典
rasters = {var: np.array([read_raster(fp)[0] for fp in fps]) for var, fps in file_paths.items()}
#print('rasters', rasters)
# 获取数据的形状
min_shape = tuple(min(sizes) for sizes in zip(*(r.shape for r in [rasters['sos'], rasters['iet']])))
# 准备工作数据
'''
这行代码的目的是创建一个名为 pixel_data 的列表,其中包含像素数据的元组;
每个元组包括 (i, j, data),其中 i 和 j 是像素的坐标,data 是一个字典,包含来自 rasters 字典的子数组;
每个 v 数组中提取的,k 是字典中的键。
'''
pixel_data = [(i, j, {k: v[:, i, j] for k, v in rasters.items()}) for i, j in
itertools.product(range(min_shape[1]), range(min_shape[2]))]#注意min_shape[n]中n的取值,是否对应长和宽维度
# 初始化结果矩阵
#corrs
sos_partial_corrs = {
'sos_tem': np.full(min_shape[1:], float('nan')),
'sos_pre': np.full(min_shape[1:], float('nan')),
'sos_iet': np.full(min_shape[1:], float('nan')),
'sos_iep': np.full(min_shape[1:], float('nan'))
}
eos_partial_corrs = {
'eos_tem': np.full(min_shape[1:], float('nan')),
'eos_pre': np.full(min_shape[1:], float('nan')),
'eos_iet': np.full(min_shape[1:], float('nan')),
'eos_iep': np.full(min_shape[1:], float('nan'))
}
los_partial_corrs = {
'los_tem': np.full(min_shape[1:], float('nan')),
'los_pre': np.full(min_shape[1:], float('nan')),
'los_iet': np.full(min_shape[1:], float('nan')),
'los_iep': np.full(min_shape[1:], float('nan'))
}
#ps
sos_partial_ps = {
'sos_tem': np.full(min_shape[1:], float('nan')),
'sos_pre': np.full(min_shape[1:], float('nan')),
'sos_iet': np.full(min_shape[1:], float('nan')),
'sos_iep': np.full(min_shape[1:], float('nan'))
}
eos_partial_ps = {
'eos_tem': np.full(min_shape[1:], float('nan')),
'eos_pre': np.full(min_shape[1:], float('nan')),
'eos_iet': np.full(min_shape[1:], float('nan')),
'eos_iep': np.full(min_shape[1:], float('nan'))
}
los_partial_ps = {
'los_tem': np.full(min_shape[1:], float('nan')),
'los_pre': np.full(min_shape[1:], float('nan')),
'los_iet': np.full(min_shape[1:], float('nan')),
'los_iep': np.full(min_shape[1:], float('nan'))
}
# 并行处理每个像素的数据。每处理1000个像素,打印一次进度。
with Pool() as pool:
for idx, result in enumerate(pool.imap(worker, pixel_data), 1):
i, j, \
sos_tem_corr, sos_pre_corr, sos_iet_corr, sos_iep_corr, \
eos_tem_corr, eos_pre_corr, eos_iet_corr, eos_iep_corr, \
los_tem_corr, los_pre_corr, los_iet_corr, los_iep_corr, \
sos_tem_p, sos_pre_p, sos_iet_p, sos_iep_p, \
eos_tem_p, eos_pre_p, eos_iet_p, eos_iep_p, \
los_tem_p, los_pre_p, los_iet_p, los_iep_p = result # 调用worker函数输出
#输出corr
# sos
sos_partial_corrs['sos_tem'][i, j] = sos_tem_corr # 从上一步输出的字典中提取并储存
sos_partial_corrs['sos_pre'][i, j] = sos_pre_corr
sos_partial_corrs['sos_iet'][i, j] = sos_iet_corr
sos_partial_corrs['sos_iep'][i, j] = sos_iep_corr
# eos
eos_partial_corrs['eos_tem'][i, j] = eos_tem_corr # 从上一步输出的字典中提取并储存
eos_partial_corrs['eos_pre'][i, j] = eos_pre_corr
eos_partial_corrs['eos_iet'][i, j] = eos_iet_corr
eos_partial_corrs['eos_iep'][i, j] = eos_iep_corr
# los
los_partial_corrs['los_tem'][i, j] = los_tem_corr # 从上一步输出的字典中提取并储存
los_partial_corrs['los_pre'][i, j] = los_pre_corr
los_partial_corrs['los_iet'][i, j] = los_iet_corr
los_partial_corrs['los_iep'][i, j] = los_iep_corr
# 输出p
# sos
sos_partial_ps['sos_tem'][i, j] = sos_tem_p # 从上一步输出的字典中提取并储存
sos_partial_ps['sos_pre'][i, j] = sos_pre_p
sos_partial_ps['sos_iet'][i, j] = sos_iet_p
sos_partial_ps['sos_iep'][i, j] = sos_iep_p
# eos
eos_partial_ps['eos_tem'][i, j] = eos_tem_p # 从上一步输出的字典中提取并储存
eos_partial_ps['eos_pre'][i, j] = eos_pre_p
eos_partial_ps['eos_iet'][i, j] = eos_iet_p
eos_partial_ps['eos_iep'][i, j] = eos_iep_p
# los
los_partial_ps['los_tem'][i, j] = los_tem_p # 从上一步输出的字典中提取并储存
los_partial_ps['los_pre'][i, j] = los_pre_p
los_partial_ps['los_iet'][i, j] = los_iet_p
los_partial_ps['los_iep'][i, j] = los_iep_p
if idx % 1000 == 0:
print(f'Processed {idx} pixels')
# sos_tem/pre/iet/iep; eos_tem/pre/iet/iep; los__tem/pre/iet/iep
# 保存结果的相关设置
# 确保从正确的位置获取元数据
_, meta = read_raster(file_paths['sos'][0]) # 从任意一个与物候变量相关的文件中获取元数据
# 更新元数据的dtype和count
meta['dtype'] = 'float32' # 设置输出图像数据类型
meta['count'] = 1
#corrs
# 从结果字典中提取偏相关系数,并输出到图像
# sos
for var in ['sos_tem', 'sos_pre', 'sos_iet', 'sos_iep']:
with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
dst.write(sos_partial_corrs[var].astype('float32'), 1)
# eos
for var in ['eos_tem', 'eos_pre', 'eos_iet', 'eos_iep']:
with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
dst.write(eos_partial_corrs[var].astype('float32'), 1)
# los
for var in ['los_tem', 'los_pre', 'los_iet', 'los_iep']:
with rasterio.open(f'{var}_partial_corr.tif', 'w', **meta) as dst:
dst.write(los_partial_corrs[var].astype('float32'), 1)
#p-values
# 从结果字典中提取偏相关系数的p值,并输出到图像
# sos
for var in ['sos_tem', 'sos_pre', 'sos_iet', 'sos_iep']:
with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
dst.write(sos_partial_ps[var].astype('float32'), 1)
# eos
for var in ['eos_tem', 'eos_pre', 'eos_iet', 'eos_iep']:
with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
dst.write(eos_partial_ps[var].astype('float32'), 1)
# los
for var in ['los_tem', 'los_pre', 'los_iet', 'los_iep']:
with rasterio.open(f'{var}_partial_p.tif', 'w', **meta) as dst:
dst.write(los_partial_ps[var].astype('float32'), 1)
if __name__ == '__main__':
main()
完结撒花~欢迎交流~