R语言绘制PCoA图

前言

主坐标分析(principal co-ordinates analysis,PCoA)是一种非约束性的数据降维分析方法,可用来研究样本群落组成的相似性或相异性。PCoA以样本距离为整体考虑,应用非常广泛。这期,我们用R语言ggplot2来实现PCoA可视化。

引用

https://zhuanlan.zhihu.com/p/389494756 (在线作图丨数据降维方法②——主坐标分析PCoA)
https://www.bilibili.com/video/BV1mU4y1m74B (R语言保姆级教程/PCoA/参数自定义)
还有其他,时间略久,找不到具体网址了…


第一步:加载安装包

library(ade4)
library(ggplot2)
library(RColorBrewer)
library(vegan)
library(plyr)

第二步:导入数据

data('iris')       # R语言自带数据集
data <- iris
head(data)

在这里插入图片描述

第三步:PCoA分析

data_new <- data[,-5]          # 先去掉最后一列的species
data_dist <- vegdist(data_new,method='bray')                     # 计算距离
data_pcoa <- cmdscale(data_dist,k=(nrow(data_new)-1),eig=TRUE)   # PCoA

在这里插入图片描述
看到warning message,所以要矫正负特征根。

data_pcoa <- cmdscale(data_dist,k=(nrow(data_new)-1),eig=TRUE,add=TRUE)  # 矫正负特征根
data_pcoa_eig <- data_pcoa$eig                     # 各PCoA轴特征值
data_pcoa_exp <- data_pcoa_eig/sum(data_pcoa_eig)  # 各PCoA轴解释量
pcoa1 <- paste(round(100*data_pcoa_exp[1],2),'%')  # 前2个轴的解释量
pcoa2 <- paste(round(100*data_pcoa_exp[2],2),'%')  # PCoA1:40.38%; PCoA2:6.9%

sample_site <- data.frame(data_pcoa$points)[1:2]
sample_site$level<-factor(data[,5],levels = c('setosa','versicolor','virginica'))
names(sample_site)[1:3] <- c('PCoA1', 'PCoA2','level')
head(sample_site)
summary(sample_site)

在这里插入图片描述
在计算出前2个轴的解释量之后,提取data_pcoa$points前2个轴的值,作为坐标点,为画PCoA图做准备。通过summary可知,目前的数据集sample_site共有150条数据,其中level共有3种水平,分别50条数据。

第四步:PCoA作图

有时候作图,碰到的分组情况是Group1、Group2、Group3这种按顺序排好的组别,但是图画出来后的图例顺序却是乱的(如Group1、Group3、Group2这种),这种时候就需要先给定好顺序。

level_order <- c("setosa","versicolor","virginica")
level_order <- factor(1:length(level_order),labels = level_order)
sample_site$level <- factor(sample_site$level,levels = levels(level_order))

PCoA作图需要准备的步骤:

find_hull <- function(sample_site) sample_site[chull(sample_site$PCoA1, sample_site$PCoA2),]
hulls <- ddply(sample_site, "level", find_hull)
hulls[,1]

PCoA1_mean <- tapply(sample_site$PCoA1,sample_site$level, mean)
PCoA2_mean <- tapply(sample_site$PCoA2,sample_site$level, mean)

mean_point <- rbind(PCoA1_mean,PCoA2_mean)
mean_point <- data.frame(mean_point)
t1 <- t(mean_point)
t2 <- as.data.frame(t1)
t2
pcoa_plot <- ggplot(sample_site, aes(PCoA1, PCoA2, color=level,shape=level)) +
  theme_classic()+        #去掉背景框
  geom_vline(xintercept = 0, color = 'gray', size = 0.4) +
  geom_hline(yintercept = 0, color = 'gray', size = 0.4) +
  geom_polygon(data = hulls,alpha = 0.2,aes(fill=factor(level),color=level),size=0.2,show.legend = FALSE) +
  geom_point(size = 4) +  #设置点的透明度、大小
  geom_point(aes(t2[1,1], t2[1,2]),size = 0.5, color='royalblue4') +
  geom_point(aes(t2[2,1], t2[2,2]),size = 0.5, color='brown2') +
  geom_point(aes(t2[3,1], t2[3,2]),size = 0.5, color='seagreen4') +
  geom_segment(aes(x=PCoA1,xend=rep(t2[,1],each=50),y=PCoA2,yend=rep(t2[,2],each=50)),size=0.2) +
  scale_color_manual(values = c('royalblue4','brown2','seagreen4')) +
  scale_fill_manual(values = c('royalblue4','brown2','seagreen4')) +
  scale_shape_manual(values = c(20,20,20)) +
  scale_x_continuous(limits = c(-0.4,0.3),breaks=round(seq(-0.4,0.3,0.1),2)) +
  scale_y_continuous(limits = c(-0.25,0.25),breaks=round(seq(-0.25,0.25,0.1),2)) +
  theme(panel.grid = element_line(color = 'black', linetype = 1, size = 0.1),
        panel.background = element_rect(color = 'black', fill = 'transparent'),
        legend.title=element_blank()
  )+labs(x = paste('PCoA1[40.38%]'), y = paste('PCoA2[6.90%]'))

在这里插入图片描述

第五步:保存图片

png(filename = 'pcoa_plot.png',width = 2500, height = 2000,res = 300)
pcoa_plot
dev.off()

到此,PCoA图已经画好。没有加置信椭圆,直接以最外围的点连接起来作为最外面边界,并将每一组的中心点位置求出来,使得每一个点都与中心点连接,形成了该PCoA图。

全部代码:

# PCoA 主坐标分析
# PCoA与PCA都是降低数据维度的方法
# 差异在于PCA是基于原始矩阵,而PCoA是基于通过原始矩阵计算出的距离矩阵

## 第一步:加载安装包
library(ade4)
library(ggplot2)
library(RColorBrewer)
library(vegan)
library(plyr)

## 第二步:导入数据
data('iris')         # R语言自带数据集
data <- iris
head(data)

## 第三步:PCoA分析
data_new <- data[,-5]
data_dist <- vegdist(data_new,method='bray')       # 计算距离
data_pcoa <- cmdscale(data_dist,k=(nrow(data_new)-1),eig=TRUE)   # PCoA
data_pcoa <- cmdscale(data_dist,k=(nrow(data_new)-1),eig=TRUE,add=TRUE)  # 矫正负特征根
data_pcoa_eig <- data_pcoa$eig     # 各PCoA轴特征值
data_pcoa_exp <- data_pcoa_eig/sum(data_pcoa_eig)  # 各PCoA轴解释量
pcoa1 <- paste(round(100*data_pcoa_exp[1],2),'%')
pcoa2 <- paste(round(100*data_pcoa_exp[2],2),'%')
sample_site <- data.frame(data_pcoa$points)[1:2]
sample_site$level<-factor(data[,5],levels = c('setosa','versicolor','virginica'))
names(sample_site)[1:3] <- c('PCoA1', 'PCoA2','level')
head(sample_site)
summary(sample_site)

## 第四步:PCoA作图
level_order <- c("setosa","versicolor","virginica")    # 确定level顺序
level_order <- factor(1:length(level_order),labels = level_order)
sample_site$level <- factor(sample_site$level,levels = levels(level_order))

find_hull <- function(sample_site) sample_site[chull(sample_site$PCoA1, sample_site$PCoA2),]
hulls <- ddply(sample_site, "level", find_hull)
hulls[,1]

PCoA1_mean <- tapply(sample_site$PCoA1,sample_site$level, mean)
PCoA2_mean <- tapply(sample_site$PCoA2,sample_site$level, mean)

mean_point <- rbind(PCoA1_mean,PCoA2_mean)
mean_point <- data.frame(mean_point)
t1 <- t(mean_point)
t2 <- as.data.frame(t1)
t2

pcoa_plot <- ggplot(sample_site, aes(PCoA1, PCoA2, color=level,shape=level)) +
  theme_classic()+        #去掉背景框
  geom_vline(xintercept = 0, color = 'gray', size = 0.4) +
  geom_hline(yintercept = 0, color = 'gray', size = 0.4) +
  geom_polygon(data = hulls,alpha = 0.2,aes(fill=factor(level),color=level),size=0.2,show.legend = FALSE) +
  geom_point(size = 4) +  #设置改点的透明度、大小
  geom_point(aes(t2[1,1], t2[1,2]),size = 0.5, color='royalblue4') +
  geom_point(aes(t2[2,1], t2[2,2]),size = 0.5, color='brown2') +
  geom_point(aes(t2[3,1], t2[3,2]),size = 0.5, color='seagreen4') +
  geom_segment(aes(x=PCoA1,xend=rep(t2[,1],each=50),y=PCoA2,yend=rep(t2[,2],each=50)),size=0.2) +
  scale_color_manual(values = c('royalblue4','brown2','seagreen4')) +
  scale_fill_manual(values = c('royalblue4','brown2','seagreen4')) +
  scale_shape_manual(values = c(20,20,20)) +
  scale_x_continuous(limits = c(-0.4,0.3),breaks=round(seq(-0.4,0.3,0.1),2)) +
  scale_y_continuous(limits = c(-0.25,0.25),breaks=round(seq(-0.25,0.25,0.1),2)) +
  theme(panel.grid = element_line(color = 'black', linetype = 1, size = 0.1),
        panel.background = element_rect(color = 'black', fill = 'transparent'),
        legend.title=element_blank()
  )+labs(x = paste('PCoA1[40.38%]'), y = paste('PCoA2[6.90%]'))
pcoa_plot

png(filename = 'pcoa_plot.png',width = 2500, height = 2000,res = 300)
pcoa_plot
dev.off()

  • 2
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值