#BiocManager::install('GenomicFeatures')
library(GenomicFeatures)
txdb <- makeTxDbFromGFF("./gencode.vM27.annotation.gtf",format="gtf")
# 通过exonsBy获取每个gene上的所有外显子的起始位点和终止位点,然后用reduce去除掉重叠冗余的部分,最后计算长度 -----------
exons_gene <- exonsBy(txdb, by = "gene")
exons_gene_lens <- lapply(exons_gene,function(x){sum(width(reduce(x)))})
exons_gene_2_lens=data.frame(t(data.frame(exons_gene_lens)))
names(exons_gene_2_lens)="length"
head(exons_gene_2_lens)[1]
write.csv(exons_gene_2_lens,"GRCh38_exon_length2.csv",row.names = T)
library(refGenome)
ens <- ensemblGenome()
# 不能读ncbi和genecode上gtf文件,r会崩溃
read.gtf(ens, "Homo_sapiens.GRCh38.104.gtf")###导入gtf文件 比较耗时
class(ens)
my_gene <- getGenePositions(ens)
write.csv(exons_gene_2_lens,"GRCh38_exon_length2.csv",row.names = T)