library(biomaRt)
#查看基因组参数
mart = useMart('ensembl')
listDatasets(mart)
#你需要哪个基因组,就复制它在dataset列里的词,放在下面这行的`dataset = `参数里
#此处以人类为例,植物参考注一
bmart <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "hsapiens_gene_ensembl",
host = "www.ensembl.org")
# 从输入数据里提取基因名
feature_ids <- rownames(expMatrix)##就是你需要ENSEMBL的例如ENSG00000000003
attributes = c(
"ensembl_gene_id",
#"hgnc_symbol",
"chromosome_name",
"start_position",
"end_position"
)
filters = "ensembl_gene_id"
feature_info <- biomaRt::getBM(attributes = attributes,
filters = filters,
values = feature_ids, mart = bmart)
mm <- match(feature_ids, feature_info[[filters]])
feature_info_full <- feature_info[mm, ]
rownames(feature_info_full) <- feature_ids
# 计算基因的有效长度
eff_length <- abs(feature_info_full$end_position - feature_info_full$start_position)
feature_info_full <- cbind(feature_info_full,eff_length)
eff_length2 <- feature_info_full[,c(1,5)]
x <- expMatrix / eff_length2$eff_length
tpm = t( t(x) / colSums(x) ) * 1e6
count转化为TPM
最新推荐文章于 2024-03-22 11:05:51 发布