metacoder_相关物种分类树绘制
这个包学习分享的人不多,一方面应用的比较少,其次呢这是基于R6编程的产物,有一定的门槛,其实只要你学会S4类对象,就差不多理解了。例如phyloseq就是典型的S4类对象。
这里给出一个应用的案例:
metacoder 包学习
安装和导入R包
#--选择安装cran或者github中的R包
# if(!require(metacoder))install.packages("metacoder")
# if(!require(metacoder))devtools::install_github("grunwaldlab/metacoder")
#--导入R包,开始学习
library(metacoder)
#---构造自己的数据
library(ggClusterNet)
library(tidyverse)
library(phyloseq)
学习数据结构
metacoder包使用的数据是tibble格式的数据框,一个OTU表格,包含原始OTU的count,并未抽平。
data(ps)
tax = ps %>%
subset_taxa(Kingdom == "Bacteria") %>%
filter_OTU_ps(100) %>%
vegan_tax() %>%
as.data.frame()
otu = ps %>%
subset_taxa(Kingdom == "Bacteria") %>%
filter_OTU_ps(100) %>%
vegan_otu() %>% t() %>%
as.data.frame()
head(otu)
## KO1 KO2 KO3 KO4 KO5 KO6 OE1 OE2 OE3 OE4 OE5 OE6 WT1 WT2
## ASV_1 1113 1968 816 1372 1062 1087 1270 1637 1368 962 1247 1017 2345 2538
## ASV_28 253 303 54 439 132 182 154 263 153 257 242 178 391 450
## ASV_8 508 504 608 424 190 327 335 535 1578 780 507 516 634 763
## ASV_16 231 248 521 270 142 215 160 239 598 283 227 179 426 288
## ASV_2 1922 1227 2355 2218 2885 1817 640 494 1218 1264 945 635 1280 1493
## ASV_79 102 117 102 95 135 161 46 38 51 63 63 66 55 59
## WT3 WT4 WT5 WT6
## ASV_1 1722 2004 1439 1558
## ASV_28 235 321 449 242
## ASV_8 553 1053 457 514
## ASV_16 243 314 819 351
## ASV_2 839 1115 1489 1170
## ASV_79 62 93 72 64
fun1 = function(x) {
str_c(x, collapse = ";")
}
dat <- apply(tax,1, fun1) %>% as.data.frame()
head(dat)
## .
## ASV_1 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Thermomonosporaceae;Unassigned;Unassigned
## ASV_28 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Thermomonosporaceae;Actinocorallia;Unassigned
## ASV_8 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Streptomycetaceae;Streptomyces;Unassigned
## ASV_16 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Streptomycetaceae;Streptomyces;Streptomyces_ederensis
## ASV_2 Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Comamonadaceae;Pelomonas;Pelomonas_puraquae
## ASV_79 Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Unassigned;Unassigned;Unassigned
dat$otu_id = row.names(dat)
colnames(dat)[1] = c("lineage")
dat = dat %>% select(otu_id,lineage)
dat2 = cbind(dat,otu)
head(dat2)
## otu_id
## ASV_1 ASV_1
## ASV_28 ASV_28
## ASV_8 ASV_8
## ASV_16 ASV_16
## ASV_2 ASV_2
## ASV_79 ASV_79
## lineage
## ASV_1 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Thermomonosporaceae;Unassigned;Unassigned
## ASV_28 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Thermomonosporaceae;Actinocorallia;Unassigned
## ASV_8 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Streptomycetaceae;Streptomyces;Unassigned
## ASV_16 Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Streptomycetaceae;Streptomyces;Streptomyces_ederensis
## ASV_2 Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Comamonadaceae;Pelomonas;Pelomonas_puraquae
## ASV_79 Bacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Unassigned;Unassigned;Unassigned
## KO1 KO2 KO3 KO4 KO5 KO6 OE1 OE2 OE3 OE4 OE5 OE6 WT1 WT2
## ASV_1 1113 1968 816 1372 1062 1087 1270 1637 1368 962 1247 1017 2345 2538
## ASV_28 253 303 54 439 132 182 154 263 153 257 242 178 391 450
## ASV_8 508 504 608 424 190 327 335 535 1578 780 507 516 634 763
## ASV_16 231 248 521 270 142 215 160 239 598 283 227 179 426 288
## ASV_2 1922 1227 2355 2218 2885 1817 640 494 1218 1264 945 635 1280 1493
## ASV_79 102 117 102 95 135 161 46 38 51 63 63 66 55 59
## WT3 WT4 WT5 WT6
## ASV_1 1722 2004 1439 1558
## ASV_28 235 321 449 242
## ASV_8 553 1053 457 514
## ASV_16 243 314 819 351
## ASV_2 839 1115 1489 1170
## ASV_79 62 93 72 64
map = sample_data(ps)
使用的是R6 数据格式。
obj <- parse_tax_data(dat2,
class_cols = "lineage", # 物种注释列,包含七级注释
class_sep = ";" #不同分类级别之间的分割符号
)
names(obj)
## [1] ".__enclos_env__" "funcs" "data"
## [4] "input_ids" "edge_list" "taxa"
## [7] "clone" "get_dataset" "get_data_taxon_ids"
## [10] "n_obs_1" "n_obs" "sample_frac_obs"
## [13] "sample_n_obs" "arrange_obs" "transmute_obs"
## [16] "mutate_obs" "select_obs" "filter_obs"
## [19] "obs_apply" "obs" "all_names"
## [22] "is_taxon_id" "print" "initialize"
## [25] "print_tree" "taxonomy_table" "remove_redundant_names"
## [28] "replace_taxon_ids" "map_data_" "map_data"
## [31] "sample_frac_taxa" "sample_n_taxa" "arrange_taxa"
## [34] "filter_taxa" "is_internode" "is_branch"
## [37] "is_stem" "is_leaf" "is_root"
## [40] "n_leaves_1" "n_leaves" "n_subtaxa_1"
## [43] "n_subtaxa" "n_supertaxa_1" "n_supertaxa"
## [46] "id_classifications" "classifications" "subtaxa_apply"
## [49] "subtaxa" "internodes" "branches"
## [52] "leaves_apply" "leaves" "stems"
## [55] "roots" "supertaxa_apply" "supertaxa"
## [58] "data_used" "get_data_frame" "get_data"
## [61] "names_used" "taxon_indexes" "set_taxon_auths"
## [64] "set_taxon_ranks" "taxon_ranks" "set_taxon_names"
## [67] "taxon_names" "taxon_ids"
obj$data$tax_data <- zero_low_counts(obj, dataset = "tax_data", min_count = 200)#少于5reads的数据可能是由于测序错误引入的,这里将它们归零
no_reads <- rowSums(obj$data$tax_data[, row.names(map)]) == 0 #由于小于5的reads被归零了,所以有些OTU可能全是0值,统计一下
sum(no_reads)
## [1] 27
obj <- filter_obs(obj, target = "tax_data", ! no_reads, drop_taxa = TRUE)#过滤掉noreads的OTU
#drop_taxa = TRUE选项会导致在过滤后没有分配OTUs的taxa被删除
print(obj)
## <Taxmap>
## 184 taxa: ab. Bacteria ... hc. Sandaracinus_amylolyticus
## 184 edges: NA->ab, ab->ac, ab->ad ... el->ha, el->hb, em->hc
## 1 data sets:
## tax_data:
## # A tibble: 100 x 19
## taxon_id KO1 KO2 KO3 KO4 KO5 KO6 OE1 OE2 OE3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 en 1113 1968 816 1372 1062 1087 1270 1637 1368
## 2 eo 253 303 0 439 0 0 0 263 0
## 3 ep 508 504 608 424 0 327 335 535 1578
## # ... with 97 more rows, and 9 more variables: OE4 <dbl>,
## # OE5 <dbl>, OE6 <dbl>, WT1 <dbl>, WT2 <dbl>, WT3 <dbl>,
## # WT4 <dbl>, WT5 <dbl>, WT6 <dbl>
## 0 functions:
obj$data$tax_data <- calc_obs_props(obj, "tax_data")#抽平OTU,消除测序深度偏差带来的影响
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
cols = row.names(map))#用抽平的OTU计算tax的丰度
## 分组统计微生物出现的次数
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups =map$Group, cols = row.names(map))
set.seed(1) # 设置为1后 后面的图片随机性降低,保证图片重复性
heat_tree(obj,
node_label = taxon_names,
node_size = n_obs,
node_color = obj$data$tax_occ$KO, # 节点颜色按照KO的数值大小映射-连续型
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads",
layout = "davidson-harel",
initial_layout = "reingold-tilford")
set.seed(1)
#-----当然,我们可以简单粗暴的使用纯色填充
p <- heat_tree(obj,
node_label = obj$taxon_names(),
node_size = obj$n_obs(),
node_color = "red",
node_size_axis_label = "OTU count",
node_color_axis_label = "Samples with reads",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford")
绘制默认树
x = parse_tax_data(dat2,
class_cols = "lineage", # 物种注释列,包含七级注释
class_sep = ";" #不同分类级别之间的分割符号
)
# 绘制默认树
heat_tree(x)
# x$taxon_names()
# 展示OTU的数量,用颜色和尺寸映射,每个节点映射的颜色和大小均代表这个节点下有包含的全部OTU
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs)
# x$data$tax_data
# 统计并可视化测序深度-也就是物种丰度
x$data$taxon_counts <- calc_taxon_abund(x, data = "tax_data")
x$data$taxon_counts$total <- rowSums(x$data$taxon_counts[, -1]) # -1 = taxon_id column
heat_tree(x, node_label = taxon_names, node_size = total, node_color = total)
#-可以通过四个部分进行颜色和大小映射,但是最好不要都用上。下面是一个例子
# 边的映射为OTU在多少个样本中检测到。
x$data$n_samples <- calc_n_samples(x, data = "taxon_counts")
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = total,
edge_color = n_samples)
选择多种可视化布局进行展示
#--选择不同展示方式
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
layout = "davidson-harel")
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
layout = "davidson-harel", initial_layout = "reingold-tilford")
# 设定图例标签
heat_tree(x,
node_label = taxon_names,
node_size = n_obs,
node_color = total,
edge_color = n_samples,
node_size_axis_label = "Number of OTUs",
node_color_axis_label = "Number of reads",
edge_color_axis_label = "Number of samples")
#避免重叠,使用overlap_avoidance
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
overlap_avoidance = .5)
# 避免标签重叠
# You can modfiy how label scattering is handled using the `replel_force` and
# `repel_iter` options. You can turn off label scattering using the `repel_labels` option.
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
repel_force = 2, repel_iter = 20000)
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
repel_labels = FALSE)
# 设定尺寸大小范围
# You can force nodes, edges, and lables to be a specific size/color range instead
# of letting the function optimize it. These options end in `_range`.
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
node_size_range = c(0.01, .1))
#-修改变的颜色
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
edge_color_range = c("black", "#FFFFFF"))
# 修改图例尺寸范围
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
node_label_size_range = c(0.02, 0.02))
# 尺寸使用数据转换
# You can change how raw statistics are converted to color/size using options
# ending in _trans.
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
node_size_trans = "log10 area")
# 设定尺寸间隔
heat_tree(x, node_label = taxon_names, node_size = n_obs, node_color = n_obs,
node_size_interval = c(10, 100))
根际互作生物学研究室 简介
根际互作生物学研究室是沈其荣院士土壤微生物与有机肥团队下的一个关注于根际互作的研究小组。本小组由袁军副教授带领,主要关注:1.植物和微生物互作在抗病过程中的作用;2 环境微生物大数据整合研究;3 环境代谢组及其与微生物过程研究体系开发和应用。团队在过去三年中在 isme J, Microbiome, PCE,SBB,Horticulture Research等期刊上发表了多篇文章。欢迎关注 微生信生物 公众号对本研究小组进行了解。
团队工作及其成果 (点击查看)
了解 交流 合作
团队负责人邮箱 袁军:
junyuan@njau.edu.cn;
团队成员:文涛:
2018203048@njau.edu.cn
团队公众号:
微生信生物 添加主编微信,或者后台留言;
点击查看团队github主页
点击查看南京农业大学资源于环境科学学院主页
加主编微信 加入群聊
关于微生信生物 你想要的都在这里
猜你喜欢
10000+:菌群分析 宝宝与猫狗 梅毒狂想曲 提DNA发Nature Cell专刊 肠道指挥大脑
文献阅读 热心肠 SemanticScholar Geenmedical
16S功能预测 PICRUSt FAPROTAX Bugbase Tax4Fun
生物科普: 肠道细菌 人体上的生命 生命大跃进 细胞暗战 人体奥秘
写在后面
为鼓励读者交流、快速解决科研困难,我们建立了“宏基因组”专业讨论群,目前己有国内外5000+ 一线科研人员加入。参与讨论,获得专业解答,欢迎分享此文至朋友圈,并扫码加主编好友带你入群,务必备注“姓名-单位-研究方向-职称/年级”。PI请明示身份,另有海内外微生物相关PI群供大佬合作交流。技术问题寻求帮助,首先阅读《如何优雅的提问》学习解决问题思路,仍未解决群内讨论,问题不私聊,帮助同行。
学习16S扩增子、宏基因组科研思路和分析实战,关注“宏基因组”
点击阅读原文