准备bam文件、gtf/gff文件
library(Rsubread)
library(limma)
library(edgeR)
library(tidyverse)
library(readr)
sample_name <- read_csv("sample.txt", col_names = FALSE) %>% pull(var = 1) %>% .[1:4] %>% str_sub(1,11)
for (i in 1:length(sample_name)) {
sample = sample_name[i]
bamFile = str_c("~/Program/tpm/",sample,".bam",sep = "")
gtfFile = "~/Program/tpm/flax_trans.gtf"
nthreads <- 16
outFilePref <- "~/Program/tpm/"
outStatsFilePath <- str_c(outFilePref,sample, '.log', sep = '')
outCountsFilePath <- str_c(outFilePref,sample, '.count', sep = '')
fCountsList = featureCounts(bamFile,
annot.ext=gtfFile,
isGTFAnnotationFile=TRUE,
nthreads=nthreads,
isPairedEnd=F)
dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)
fpkm = rpkm(dgeList, dgeList$genes$Length)
tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm)
colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm')
write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
}