获得module丰度信息_m00356-CSDN博客

本文链接：https://blog.csdn.net/weixin_42480153/article/details/114377902

本文介绍了一种通过整合基因注释与转录组数据来计算特定微生物代谢途径模块丰度的方法。该方法首先从基因集注释文件中筛选出感兴趣的代谢模块，然后将其与样品的DNA或RNA丰度数据进行匹配，最终计算出各模块的总丰度。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

#获得微生物代谢相关module的丰度信息
#DNA水平module
library(data.table)
library(dplyr)
fread("/mnt/mzy/dairycow/72samples/geneset/annotation.emapper.annotations",header=F,sep="\t",fill=T)->a
list=c("M00004", "M00006", "M00007", "M00008", "M00033", "M00165", "M00166", "M00167", "M00168", "M00169", "M00170", "M00171", "M00172", "M00173", "M00174", "M00175", "M00176", "M00308", "M00309", "M00344", "M00345", "M00346", "M00356", "M00357", "M00358", "M00373", "M00374", "M00375", "M00376", "M00377", "M00378", "M00418", "M00419", "M00422", "M00528", "M00529", "M00530", "M00531", "M00533", "M00534", "M00537", "M00538", "M00539", "M00540", "M00541", "M00543", "M00544", "M00545", "M00547", "M00548", "M00550", "M00551", "M00563", "M00567", "M00568", "M00569", "M00579", "M00580", "M00595", "M00596", "M00608", "M00620", "M00622", "M00623", "M00624", "M00636", "M00637", "M00638", "M00740", "M00804", "M00810", "M00811", "M00836", "M00878", "M00915", "M00919")
fread("/mnt/mzy/dairycow/72samples/profile/salmonDNA.csv",sep=",",header=T,fill=T)->abd
colnames(abd)[-1]->tmp
#循环开始

for (i in 1:length(list)){
#system(paste("grep ",list[i]," annotation.emapper.annotations|cut -f1>tmp.names",sep="")) #用paste以后就不用加引号了
#fread("tmp.names",header=F)->b
a[grep(list[i],as.matrix(a[,11])),1]->b #grep检索list必须转换为数据框或者矩阵,以上内容可以用这个来解决
if(dim(b)[1]>0){
left_join(b,abd,by=c("V1"="V1"))->c
c[is.na(c)]<-0 #防止有注释没丰度导致的NA
colSums(c[,2:dim(c)[2]])->d
data.frame(tmp,d)->tmp
colnames(tmp)[dim(tmp)[2]]<-list[i]
print(i)
print(date())
}
}
write.csv(tmp,"moduleDNA.csv",col.names=T,row.names=F,quote=F)

#RNA水平module
fread("/mnt/mzy/dairycow/72samples/profile/salmonRNA.csv",sep=",",header=T,fill=T)->abd
colnames(abd)[-1]->tmp
#循环开始
for (i in 1:length(list)){
#system(paste("grep ",list[i]," annotation.emapper.annotations|cut -f1>tmp.names",sep="")) #用paste以后就不用加引号了
#fread("tmp.names",header=F)->b
a[grep(list[i],as.matrix(a[,11])),1]->b #grep检索list必须转换为数据框或者矩阵,以上内容可以用这个来解决
if(dim(b)[1]>0){
left_join(b,abd,by=c("V1"="V1"))->c
c[is.na(c)]<-0
colSums(c[,2:dim(c)[2]])->d
data.frame(tmp,d)->tmp
colnames(tmp)[dim(tmp)[2]]<-list[i]
print(i)
print(date())
}
}
write.csv(tmp,"moduleRNA.csv",col.names=T,row.names=F,quote=F)