PCA组合图可视化,本文完全参考自 带统计学的PCoA,如有侵权,欢迎沟通。
加载R包
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(tidyverse)
library(patchwork)
library(vegan)
library(ape)
library(grid)
library(multcomp)
# rm(list = ls())
options(stringsAsFactors = F)
导入数据
data("iris")
data <- iris[, 1:4]
groups <- data.frame(rownames(iris), iris[, 5])
colnames(groups) <- c("V1", "V2")
head(groups)
绘图参数
length <- length(unique(as.character(groups$V1)))
times1 <- length%/%8
res1 <- length%%8
times2 <- length%/%5
res2 <- length%%5
col1 <- rep(1:8, times1)
col <- c(col1, 1:res1)
pich1 <- rep(c(21:24), times2)
pich <- c(pich1, 15:(15+res2))
cbbPalette <- c("#B2182B", "#E69F00", "#56B4E9",
"#009E73", "#F0E442", "#0072B2",
"#D55E00", "#CC79A7", "#CC6666",
"#9999CC", "#66CC99", "#ADD1E5")
PCoA
data <- vegdist(data)
pcoa <- pcoa(data, correction = "none", rn = NULL)
PC1 <- pcoa$vectors[, 1]
PC2 <- pcoa$vectors[, 2]
plotdata <- data.frame(rownames(pcoa$vectors), PC1, PC2, groups$V2)
colnames(plotdata) <-c("sample","PC1","PC2","Group")
pc1 <- floor(pcoa$values$Relative_eig[1] * 100)
pc2 <- floor(pcoa$values$Relative_eig[2] * 100)
plotdata$Group <- factor(plotdata$Group, levels = c("setosa", "versicolor", "virginica"))
head(plotdata)
PC1和PC2的显著性检验
yf <- plotdata
yd1 <- yf %>% group_by(Group) %>% summarise(Max = max(PC1))
yd2 <- yf %>% group_by(Group) %>% summarise(Max = max(PC2))
yd1$Max <- yd1$Max + max(yd1$Max) * 0.1
yd2$Max <- yd2$Max + max(yd2$Max) * 0.1
fit1 <- aov(PC1 ~ Group, data = plotdata)
tuk1 <- multcomp::glht(fit1, linfct = mcp(Group = "Tukey"))
res1 <- multcomp::cld(tuk1, alpah = 0.05)
fit2 <- aov(PC2 ~ Group, data = plotdata)
tuk2 <- multcomp::glht(fit2, linfct = multcomp::mcp(Group = "Tukey"))
res2 <- cld(tuk2, alpah = 0.05)
test <- data.frame(PC1 = res1$mcletters$Letters,
PC2 = res2$mcletters$Letters,
yd1 = yd1$Max,
yd2 = yd2$Max,
Group = yd1$Group)
test$Group <- factor(test$Group, levels = c("setosa", "versicolor", "virginica"))
head(test)
相须图绘制
特别强调,一定要先画上方和右侧的相须图!!!
这里有一个细节,就是因为相须图是添加了差异检验字母的,就会导致相须图和PCoA散点图的坐标轴范围不一致,如果直接合并的话会导致图像扭曲,箱子无法准确对应PCoA中点的分布。
所以一定要先画相须图,然后在后面PCoA图的绘制过程中调用两个相须图的坐标轴范围,以达到4个图的完美匹配。
p1 <- ggplot(plotdata, aes(Group, PC1)) +
geom_boxplot(aes(fill = Group)) +
geom_text(data = test, aes(x = Group, y = yd1, label = PC1),
size = 7, color = "black", fontface = "bold") +
coord_flip() +
scale_fill_manual(values = cbbPalette) +
theme_bw() +
theme(axis.ticks.length = unit(0.4, "lines"),
axis.ticks = element_line(color = 'black'),
axis.line = element_line(color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(color = 'black', size = 20, face = "bold"),
axis.text.x = element_blank(),
legend.position = "none")
p3 <- ggplot(plotdata, aes(Group, PC2)) +
geom_boxplot(aes(fill = Group)) +
geom_text(data = test, aes(x = Group, y = yd2, label = PC2),
size = 7, color = "black", fontface = "bold") +
scale_fill_manual(values = cbbPalette) +
theme_bw() +
theme(axis.ticks.length = unit(0.4, "lines"),
axis.ticks = element_line(color = 'black'),
axis.line = element_line(color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(color = 'black', size = 20, angle = 45,
vjust = 1, hjust = 1, face = "bold"),
axis.text.y = element_blank(),
legend.position = "none")
p1 / p3
PCoA结果图绘制
p2 <- ggplot(plotdata, aes(PC1, PC2)) +
geom_point(aes(fill = Group), size = 8, pch = 21)+
scale_fill_manual(values = cbbPalette, name = "Group")+
xlab(paste("PC1 ( ", pc1, "%"," )", sep = "")) +
ylab(paste("PC2 ( ", pc2, "%"," )", sep = "")) +
xlim(ggplot_build(p1)$layout$panel_scales_y[[1]]$range$range) +
ylim(ggplot_build(p3)$layout$panel_scales_y[[1]]$range$range) +
theme(text=element_text(size = 30)) +
geom_vline(aes(xintercept = 0), linetype = "dotted") +
geom_hline(aes(yintercept = 0), linetype = "dotted") +
guides(fill = guide_legend(ncol = 1)) +
theme(panel.background = element_rect(fill = 'white', color = 'black'),
panel.grid = element_blank(),
axis.title = element_text(color = 'black', size = 34),
axis.ticks.length = unit(0.4,"lines"),
axis.ticks = element_line(color = 'black'),
axis.line = element_line(color = "black"),
axis.title.x=element_text(color = 'black', size = 34, vjust = 7),
axis.title.y=element_text(color = 'black', size = 34, vjust = -2),
axis.text=element_text(color = 'black', size = 28),
legend.title=element_text(size = 24, face = "bold"),
legend.text=element_text(size = 20),
legend.key=element_blank(),
legend.position = c(0.88, 0.13),
legend.background = element_rect(color = "black"),
legend.key.height=unit(1, "cm"))
p2
PERMANOVA分析
otu.adonis <- adonis(data ~ V2, data = groups, distance = "bray")
p4 <- ggplot(plotdata, aes(PC1, PC2)) +
geom_text(aes(x = -0.5, y = 0.6,
label = paste("PERMANOVA:\ndf = ",
otu.adonis$aov.tab$Df[1],
"\nR2 = ",
round(otu.adonis$aov.tab$R2[1], 4),
"\np-value = ",
otu.adonis$aov.tab$`Pr(>F)`[1],
sep = "")),
size = 7) +
theme_bw() +
xlab("") + ylab("") +
theme(panel.grid = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank())
p4
图像拼接
p5 <- p1 + p4 + p2 + p3 +
plot_layout(heights = c(1, 4),
widths = c(4, 1),
ncol = 2,
nrow = 2)
p5
系统信息
devtools::session_info()