1.从linux系统里面生成quant.sf文件,此步骤见
RNA-SEQ的上游分析及数据处理
2.在R里面导入数据并且筛选掉等于0 ,以及重复,低质量的数据
setwd("/RNASEQ/quant")
t2s <- fread("t2s_h38_ensembl.txt", data.table = F, header = F); head(t2s)
files <- list.files(pattern="*quant.sf",recursive=T, full.names = T); files #显示目录下所有符合要求的文件
txi <- tximport(files, type = "salmon", tx2gene = t2s,txOut = TRUE)
cn <- sapply(strsplit(files,'\\/'), function(x) x[length(x)-1]); cn
colnames(txi$counts) <- gsub('_quant','',cn); colnames(txi$counts)
counts <- as.data.frame(apply(txi$counts,2,as.integer)) #将counts数取整
rownames(counts) <- rownames(txi$counts)
结果如下:
3.然后进行deg-seq2分析,获取logFC,pvale,padj
name_list <- c("geneid","symbol","symbol","symbol","symbol","symbol")###此处是样品的名字,根据自己的样品设置
nlgl <- data.frame(row.names=colnames(counts),
name_list=name_list,
group_list=name_list)
fix(nlgl)
name_list <- nlgl$name_list
colnames(counts) <- name_list
group_list <- nlgl$group_list
gl <- data.frame(row.names=