该方法需要魔法,我已进行了调试,可以成功运行
转录本test.ID样式(ENSMUST00000040608.4),以xlsx形式储存
library(Biostrings)
library(biomaRt)
library(openxlsx)
library(seqinr)
rm(list = ls())
genelist <- read.xlsx("待测基因testID汇总.xlsx")
genelist <- genelist$test_id
#移除genelist的小数点
genelist<-unlist(lapply(genelist, FUN = function(x) {return(strsplit(x, split = ".",fixed = T)[[1]][1])}))
mart <- useMart("ensembl", dataset="mmusculus_gene_ensembl") # 小鼠的数据库
Ensemble_to_seq <- function(x) {cat("Getting sequence for gene", x, "\n") # 添加打印语句
seq = biomaRt::getSequence(id = x,
type = "ensembl_transcript_id",
seqType = "cdna",
mart = mart)
seq<-as.data.frame(seq)
seq$"Length"<-lapply(seq[,1],function(y){return(nchar(y))})
return(seq)
}
outTab <- data.frame()
for (i in genelist){outTab<-rbind(outTab,Ensemble_to_seq(i))} # 此步需要魔法
# 保存为fa格式文件
for (i in 1:nrow(outTab)) {
# 定义序列
seq <- DNAStringSet(outTab$cdna[i])
# 定义描述信息
descr <- paste(outTab$ensembl_transcript_id[i], "length=", outTab1$Length[i], sep = "|")
# 输出序列
write.fasta(seq, names = outTab$ensembl_transcript_id[i], file.out = "output.fa", open = "a")
}
最后是输出结果展示