根据slope影像的数值,设定步长为0.005,然后统计所有影像在对应范围下的结果,以此进行绘箱线图。
import numpy as np
from osgeo import gdal
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from matplotlib import colors
from scipy import optimize, stats
# 读取tif格式的栅格影像数据,并将特殊值替换为NaN
def read_tif(filename):
dataset = gdal.Open(filename)
data = dataset.ReadAsArray().flatten()
data = np.where((data == -9999.000000) | (data == -3.402823e+38), np.nan, data)
return data
# 读取tif格式的栅格影像数据,并将特殊值替换为NaN
def read_vel(filename):
dataset = gdal.Open(filename)
data = dataset.ReadAsArray().flatten()
data = np.where((data > 0) | (data < -3.4028235e+37), np.nan, data)
return data
# 读取P_value格式的栅格影像数据,并将未通过显著检验的值替换为NaN
def read_pvalue(filename):
dataset = gdal.Open(filename)
data = dataset.ReadAsArray().flatten()
data = np.where((data > 0.01), np.nan, data) ##p检验值
return data
def read_tif2(filename):
dataset = gdal.Open(filename)
data = dataset.ReadAsArray().flatten()
data = np.where((data <= 0) | (data == -3.402823e+38), np.nan, data)
return data
# 定义拟合函数,这里使用简单的线性拟合
def linear_func(x, a, b):
return a * x + b
# 需要拟合的函数
def f_1(x, A, B):
return A * x + B
def rmse2(x, y):
RMSE=np.sqrt(((x - y) ** 2).mean())
return RMSE
# 读取两幅影像数据
data1 = read_tif('slopes_year_C.tif')
data2 = read_vel('velocity_null.tif')
data3 = read_pvalue('p_values_year_C.tif')
data4 = read_tif2('DEM2.tif')
# 合并成DataFrame,并去除空值
df = pd.DataFrame({'slope': data1, 'velocity': data2, 'p_values':data3, 'ele':data4})
df.dropna(inplace=True)
print(df['slope'].min())
print(df['slope'].max())
# 定义划分步长和划分数量
step = 0.005
num_bins = 60
bins = {} # 创建一个空字典来存储划分后的子集
start_0 = 0.08 # 初始化划分范围起始值
start = 0.08 # 初始化划分范围起始值
# 根据步长和划分数量生成子集
for i in range(num_bins):
# 计算划分范围的结束值
end = start + step
print(end)
# 创建子集的名称,如 VIC1、VIC2、VIC3 等
bin_name = f'VIC{i+1}'
print(bin_name)
# 根据划分范围筛选数据
bins[bin_name] = df[(df['slope'] > start) & (df['slope'] < end)]
# print(bins[bin_name])
# 更新起始值为下一个范围的起始值
start = end
# 处理最后一个子集(大于等于最后一个范围起始值的数据)
bins[f'VIC{num_bins+1}'] = df[df['slope'] >= start]
# 打印每个子集的数据量
for bin_name, bin_df in bins.items():
print(f'{bin_name}: {len(bin_df)} data points')
# 生成箱线图的标签
labels = [f'{start_0 + i * step:.2f}~{start_0 + (i + 1) * step:.2f}' for i in range(num_bins)] + [f'>{start_0 + num_bins * step:.2f}']
print(labels)
plt.boxplot([bins[f'VIC{i+1}']['ele'] for i in range(num_bins)] + [bins[f'VIC{num_bins+1}']['ele']],
medianprops={'color': 'blue', 'linewidth': '1.5'},
patch_artist=False,
meanline=True,
showmeans=True,
meanprops={'color': 'red', 'ls': '--', 'linewidth': '1.5'},
showfliers=False,
labels=labels)
plt.xticks(rotation=45, fontproperties='DejaVu Sans', fontsize=9)
plt.xlabel('Ele')
plt.ylabel('Velocity_2014-2022')
plt.show()