这是一步很简单的步骤,但是我每次遇到这个问题都得去网上搜教程,所以我把他单独列出一章
代码如下:
先设置工作路径
setwd("C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte")
把原始的txt文件转换成更方便的csv格式文件
#读取txt文件
dat_CPC2 <- read.table("E:\\结果\\lncRNA Predicte\\CPC2.txt", sep = "\t", stringsAsFactors = FALSE)
dat_CNCI <- read.table("C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\CPC2.txt", sep = "\t", stringsAsFactors = FALSE)
#将txt文件转换成csv文件
write.csv(dat_CPC2,'C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\CNCI.csv',row.names = FALSE)
write.csv(dat_CNCI,'C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\CPC2.csv',row.names = FALSE)
#读取ncRNA的txt文件
dat_noncoding <- read.table("C:\\Users\\DI\\Desktop\\lncRNA\\noncoding.txt", sep = "\t", stringsAsFactors = FALSE)
#将ncRNA的txt文件转换成csv
write.csv(dat_noncoding,'noncoding.csv',row.names = FALSE)
将处理出来的noncoding取交集
library(dplyr)
data_CNCI <- read.csv('C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\CNCI.csv')
data_CPC2 <- read.csv('C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\CPC2.csv')
result1 = intersect(data_CNCI$TranscriptID,data_CPC2$TranscriptID)
result2 = intersect(data_PLEK$TranscriptID,result1)
#write.table(file="C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\lncRNA.csv",result1,quote=F,row.names = F,sep="\t")
提取表达矩阵中的lncRNA
lncRNA <- read.csv('C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA Predicte\\lncRNA.csv')
data_Gastrocnemius_transcript_count_matrix <- read.csv('C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\count\\Gastrocnemius_transcript_count_matrix.csv')
lncRNA_transcript_count_matrix <- data_Gastrocnemius_transcript_count_matrix[rownames(lncRNA),]
#write.table(file="C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA_Gastrocnemius_transcript_count_matrix.csv",lncRNA_transcript_count_matrix,sep = ",", row.names = FALSE)
length_CNCI <- read.csv('C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\gene_length.csv')
length_CNCI <- length_CNCI[rownames(lncRNA),]
diff <- read.csv('C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\all_diff_Gastrocnemius_transcript_count_matrix.csv')
length_CNCI <- diff[rownames(lncRNA),]
write.table(file="C:\\Users\\DI\\Desktop\\PRJNA516151_lnc\\lncRNA_Gastrocnemius_transcript_length.csv",length_CNCI,sep = ",", row.names = FALSE)
通过上述步骤就可以把lncRNA从原始的表达矩阵中筛选出来,以便于后续lncRNA的研究