怎么在堆叠柱状图中体现百分比_微生物门类堆叠柱状图一文解决

写在前面

无论是堆叠柱状图,还是近年来会扩展的冲击图。基本都只能对门水平物种多样性进行可视化。然而即使是门水平,也不一定是全部的样本都适合使用堆叠柱状图可视化。

尤其是土壤等复杂的微生物群落的环境,往往门水平的物种数量也在50个左右,或者有七八十个也是常见的。而堆叠柱状图一般可以展示的也就10个左右。过多无论是图例,还是颜色,都无法进行很好的安排和调配。

所以我们一般情况下展示的,都是主要的门类,也确实10个左右的门类基本代表了大部分的微生物。但在分析过程中却存在较大问题。

当我们选择这些主要的门类的时候存在两种选择:

  • 对全部门类做相对丰度转化,并提取主要的门类物种展示,但必然堆叠柱状图的纵坐标达不到100%。

    这样做丰度评估当然是十分准确的,但却不好看。

  • 还有一个选择,我们对全部otu表格标准化后,提取主要的微生物门类,得到门类信息的sub表格后,再次标准化,然后就可以达到100%效果的y轴。

    但是这样对主要微生物门类的丰度存在不同程度的变形,也就是说门类丰度展现的不够准确了。

    所以不能做删减微生物门类,只能通过合并低丰度微生物门类达到效果,方可准确无误展示丰度

实战

第一种方式 y周丰度参差不齐

第一种方式直接参考这里

微生信生物新年放大招:一条代码完成堆叠柱状图-冲击图的操作-终结版

cb2251e38ffc5963ed69e8abbc3053a5.png

第二种方式 重新标准化 Y轴统一100%

我们是对于分析时认真的,所以也构建了一个函数,用于操作,参见文章末尾。

选择Top10:也就是相对丰度排名前十位的门水平物种展示,其余全部合并并作为others。所以一共展示了11种。

ps = readRDS("../data//ps_liu.rds")
result = barMainplot(ps = ps,j = "Phylum",rep = 6,axis_ord = NULL,label = FALSE ,sd = FALSE,Top = 10)
result[[1]]
result[[2]]

e01753da8db2f64a642c67705a801a45.png

欢迎加入微生信生物

5cd69331ad515dd1f2af04de923f8d15.png

快来微生信生物

微生信生物

函数

barMainplot = function(otu = NULL,tax = NULL,map = NULL,ps = NULL,j = "Phylum",group = "Group",rep = 6,axis_ord = NULL,label = TRUE ,sd = FALSE,Top = 10,tran = TRUE){
# path = "./barplot/"
# dir.create(path)

library(phyloseq)
library(tidyverse)
library(vegan)
library(reshape2)
library("plyr")
library(ggalluvial)
library(ggplot2)

if (is.null(axis_ord)) {
axis_order = NA
}else{
axis_order = strsplit(basename(axis_ord), "-")[[1]]
}

if (is.null(otu)&is.null(tax)&is.null(map)) {
ps = ps
map = as.data.frame(sample_data(ps))
map = map[, group]
colnames(map) = "Group"
map$Group = as.factor(map$Group)
sample_data(ps) = map
map = NULL
}

if (!is.null(otu)|!is.null(tax)|!is.null(map) ) {
#导入otu表格
# otu = read.delim(otu,row.names = 1)
# head(otu)
otu = as.matrix(otu)
str(otu)
#导入注释文件
# tax = read.delim(tax,row.names = 1)
# head(tax)
tax = as.matrix(tax)
# taxa_names(tax)

#导入分组文件
# map = read.delim(map,row.names = 1)
# head(map)
colnames(map) = gsub(group,"AA", colnames(map))

map$Group = map$AA
map$Group = as.factor(map$Group )
map$Group
# #导入进化树
# tree = read.tree("./otus.tree")
# tree

ps sample_data(map) ,
tax_table(tax)
# phy_tree(tree)
)
}
ps
# map = as.data.frame(sample_data(ps))
# head(map)
# colnames(map) = gsub(group,"AA", colnames(map))
#
# map$Group = map$AA
# map$Group = as.factor(map$Group )
# sample_data(ps) = map

ps1 = ps
i = ps1


colnames(tax_table(ps1))

psdata = i %>%
tax_glom(taxrank = j)
psdata


if (tran == TRUE) {
psdata = psdata%>%
transform_sample_counts(function(x) {x/sum(x)} )
}

vegan_otu OTU if(taxa_are_rows(OTU)){
OTU }
return(as(OTU,"matrix"))
}


otu = otu_table(psdata)


tax = tax_table(psdata)
head(tax)
for (i in 1:dim(tax)[1]) {
if (row.names(tax)[i] %in% names(sort(rowSums(otu), decreasing = TRUE)[1:Top])) {

tax[i,j] =tax[i,j]
} else {
tax[i,j]= "others"
}
}
tax_table(psdata)= tax
# #-----排序需要转化
# tax = tax_table(psdata )
# colnames(tax) = gsub(j,"Phylum",colnames(tax))
# tax_table(psdata) = tax

##这里我们过滤一定阈值的otu,会出现最后堆叠柱状图总体丰度高于100%的情况,这是合理的
###########绘制不同分类等级的柱状图
Taxonomies %# Transform to rel. abundance
psmelt()

# %>% # Melt to long format
# # filter(Abundance >= k) %>% # Filter out low abundance taxa
# arrange(Phylum)

head(Taxonomies)
# dim(Taxonomies)
# 这里我们看到有很过属,因此颜色上就会出现不能很好区分的现象
colbar

colors = colorRampPalette(c( "#CBD588", "#599861", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64","black"))(colbar)



Taxonomies$Abundance = Taxonomies$Abundance * 100
# Taxonomies$Abundance = Taxonomies$Abundance/sum(Taxonomies$Abundance)

Taxonomies$Abundance = Taxonomies$Abundance/rep
head(Taxonomies)

#按照分组求均值
colnames(Taxonomies) by_cyl zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance), sd(Abundance))
head(zhnagxu2)

##确定因子,这里通过求和按照从小到大的顺序得到因子
##长变宽

head(Taxonomies)

# Taxonomies2 = dcast(Taxonomies,aa ~ Sample,value.var = "Abundance")
# head(Taxonomies2)
# Taxonomies2[is.na(Taxonomies2)] # aa = Taxonomies2
# # head(aa)
#
# n = ncol(aa)
# #增加一行,为整列的均值,计算每一列的均值,2就是表示列
# aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum)
# colnames(aa)[n+1] # # str(aa)
# bb # # head(bb)
# bb = bb[c(1,ncol(bb))]
# cc # head(cc)

iris_groups cc head(cc)
colnames(cc)= c("aa","allsum")
cc head(cc)


##使用这个属的因子对下面数据进行排序

head(zhnagxu2)
colnames(zhnagxu2) zhnagxu2$aa = factor(zhnagxu2$aa,order = T,levels = cc$aa)
zhnagxu3 = plyr::arrange(zhnagxu2,desc(aa))
head(zhnagxu3)
##制作标签坐标,标签位于顶端
# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))
# head(Taxonomies_x )
#标签位于中部
# Taxonomies_x1 = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)
Taxonomies_x = ddply(zhnagxu3,"group", transform, label_sd = cumsum(Abundance), label_y = cumsum(Abundance) - 0.5*Abundance)
# Taxonomies_x$label_y =
head(Taxonomies_x,6 )
Taxonomies_x$label = Taxonomies_x$aa
#使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
for(i in 1:nrow(Taxonomies_x)){
if(Taxonomies_x[i,3] > 3){
Taxonomies_x[i,5] = Taxonomies_x[i,5]
}else{
Taxonomies_x[i,5] = NA
}
}

#----开始设置颜色映射数量,就是图例显示的数量。
#假如仅仅设置平均丰度排名前10位的展示标签,其他标记为others
# Top = 10
# i = 1
# Taxonomies_x$aa = as.character(Taxonomies_x$aa)
# for (i in 1:length(Taxonomies_x$aa)) {
# if (Taxonomies_x$aa[i] %in% cc$aa[1:Top]) {
# # Taxonomies_x$aa[i] = Taxonomies_x$aa[i]
# }else{
#
# Taxonomies_x$aa[i] = "others"
# }
# }

Taxonomies_x$aa = factor(Taxonomies_x$aa,order = T,levels = c(as.character(cc$aa)))

# cc$aa
# Taxonomies_x$aa
unique( Taxonomies_x$aa)

head(Taxonomies_x )

##普通柱状图
p4 geom_bar(stat = "identity",width = 0.5,color = "black") +
scale_fill_manual(values = colors) +
theme(axis.title.x = element_blank()) +
theme(legend.text=element_text(size=6)) +
scale_y_continuous(name = "Abundance (%)")

p4
if (is.na(axis_order)) {
p4 = p4
}else{
p4 = p4 +scale_x_discrete(limits = axis_order)
}


if (sd == TRUE) {
p4 = p4 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
}

if (label == TRUE) {
p4 = p4 +
geom_text(aes(y = label_y, label = label ),size = 4,fontface = "bold.italic")
}

# print(p4)

# install.packages("ggalluvial")
p4 = p4+theme_bw()+
scale_y_continuous(expand = c(0,0))+

theme(

panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(face = "bold"),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14,),
axis.text.y = element_text(colour = "black",size = 14),

legend.text = element_text(size = 15)
#legend.position = "none"#是否删除图例

)
p4
map = as.data.frame(sample_data(ps))
if (length(unique(map$Group))>3){ p4=p4+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}

#-------------冲击图
head(Taxonomies_x )
cs = Taxonomies_x $aa

# head(cs)
# as.factor(Taxonomies_x $Genus)
# cs = as.character(Taxonomies_x $Genus)
# cs1 = as.factor(cs)
cs1 = cs
#提取真正的因子的数量
lengthfactor = length(levels(cs1))
#提取每个因子对应的数量
cs3 = summary(as.factor(cs1))
cs4 = as.data.frame(cs3)
cs4$id = row.names(cs4)
#对因子进行排序
df_arrange #对Taxonomies_x 对应的列进行排序
Taxonomies_x1 head(Taxonomies_x1)
#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))

#colour = "black",size = 2,,aes(color = "black",size = 0.8)
head(Taxonomies_x1)
p3 = ggplot(Taxonomies_x1,
aes(x = group, stratum = aa, alluvium = ID,
weight = Abundance,
fill = aa, label = aa)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "black",size = 0.2,width = 0.3,alpha = .2) +
geom_bar(width = 0.45)+
geom_stratum(width = 0.45,size = 0.2) +
#geom_text(stat = "stratum", size = 3,family="Times New Roman",fontface = "bold.italic") +
#theme(legend.position = "none") +
scale_fill_manual(values = colors)+
#ggtitle("fow_plot")+
# scale_x_discrete(limits = axis_order)+
# geom_text(aes(y = label_y, label = label ),size = 4,fontface = "bold.italic")+
labs(x="group",
y="Relative abundancce (%)",
)
p3
if (is.na(axis_order)) {
p3 = p3
}else{
p3 = p3 +scale_x_discrete(limits = axis_order)
}
# p3
if (label == TRUE) {
p3 = p3 +
geom_text(aes(y = label_y, label = label ),size = 4,fontface = "bold.italic")
}

if (sd == TRUE) {
p3 = p3 +
geom_errorbar(aes(ymin=label_sd-sd, ymax=label_sd +sd), width=.2)
}
p3 =p3+theme_bw()+
scale_y_continuous(expand = c(0,0))+
#geom_hline(aes(yintercept=0), colour="black", linetype=2) +
#geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
#scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(

panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
text=element_text(face = "bold"),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),

legend.text = element_text(size = 15,face = "bold.italic")
#legend.position = "none"#是否删除图例

)
p3
if (length(unique(map$Group))>3){ p3=p3+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))}




return(list(p4,Taxonomies_x,p3))

}

添加主编微信 加入群聊

5cd69331ad515dd1f2af04de923f8d15.png

关于微生信生物 你想要的都在这里

微生信生物

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值