酷不酷炫!想不想学!带统计学的PCoA完美解决打样本量多组数据不好区分的问题!!-腾讯云开发者社区-腾讯云
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from skbio import DistanceMatrix
from skbio.stats.ordination import pcoa
from skbio.stats.distance import permanova
from scipy.stats import mannwhitneyu
import seaborn as sns
import pandas as pd
from scipy.spatial.distance import squareform
from matplotlib.colors import ListedColormap
from sklearn.linear_model import LinearRegression
from matplotlib.colors import Normalize
from matplotlib.cm import ScalarMappable
data = pd.read_csv('all.otus.abu.txt', delimiter='\t', header=0, index_col=0)
group_data = pd.read_csv('分组.txt', delimiter='\t', index_col=0)
array_data = data.values.T
# 计算样本间的 Bray-Curtis 距离矩阵
distance_matrix_data = np.zeros((len(array_data), len(array_data)))
for i in range(len(array_data)):
for j in range(len(array_data)):
distance_matrix_data[i, j] = np.abs(array_data[i] - array_data[j]).sum() / np.abs(array_data[i] + array_data[j]).sum()
# 将距离矩阵转换为DistanceMatrix对象
distance_matrix = DistanceMatrix(distance_matrix_data)
# PCoA分析
pcoa_results = pcoa(distance_matrix)
pc1, pc2 = pcoa_results.samples['PC1'], pcoa_results.samples['PC2']
# 分组信息
grouping = group_data.values.T[0]
groups = list(np.unique(grouping))
values =[groups.index(name) for name in grouping]
dic_pc1={}
for g in groups: dic_pc1[g]=[]
for i in range(len(pc1)): dic_pc1[grouping[i]].append(pc1[i])
dic_pc2={}
for g in groups: dic_pc2[g]=[]
for i in range(len(pc2)): dic_pc2[grouping[i]].append(pc2[i])
# PERMANOVA结果
perm_results = permanova(distance_matrix, grouping)
# 计算自由度
df = len(pc1) - 2
# 创建线性回归模型并进行拟合,计算 R²
model = LinearRegression().fit(np.array(pc1).reshape(-1, 1), pc2)
r2 = model.score(np.array(pc1).reshape(-1, 1), pc2)
# 设置图形布局
fig = plt.figure(figsize=(10, 10))
gs = GridSpec(4, 4, figure=fig)
# 主散点图
main_ax = fig.add_subplot(gs[1:4, 0:3])
main_ax.set_xlabel('PC1')
main_ax.set_ylabel('PC2')
scatter = main_ax.scatter(pc1, pc2, c=values, cmap='jet')
main_ax.legend(handles=scatter.legend_elements()[0], labels=groups, title="Group", loc='lower right')
result_text = f"PERMANOVA\n df={df}\nR² = {r2:.4f}\np-value = {perm_results['p-value']:.4f}"
ax_res = fig.add_subplot(gs[0, 3], sharey=main_ax)
ax_res.text(0.95, 0.95, result_text, transform=ax_res.transAxes, ha='right', va='top')
ax_res.xaxis.set_visible(False)
ax_res.yaxis.set_visible(False)
cmap = plt.get_cmap('jet')
colors = cmap(np.linspace(0, 1, len(groups)))
color_dict = {label: color for label, color in zip(groups, colors)}
# 上侧箱型图
top_ax = fig.add_subplot(gs[0, :-1], sharex=main_ax)
df_pc1 = pd.DataFrame.from_dict(dic_pc1, orient='index').T
sns.boxplot(data=df_pc1, ax=top_ax, orient='h', palette=color_dict)
top_ax.set(xlabel='')
# 右侧箱型图
right_ax = fig.add_subplot(gs[1:, -1], sharey=main_ax)
df_pc2= pd.DataFrame.from_dict(dic_pc2, orient='index').T
sns.boxplot(data=df_pc2, ax=right_ax, orient='v', palette=color_dict)
plt.setp(right_ax.get_xticklabels(), rotation=45)
right_ax.set(ylabel='')
plt.tight_layout()
plt.show()