读取表达数据,列名是样本名
DEG_expression <- read.csv("DEG_expression.csv",row.names = 1,header = T)
DEG_expression <- t(DEG_expression)
相关性分析
#进行相关性分析#
gene_cor<- cor(DEG_expression)
#绘制热图#
library(pheatmap)
pheatmap(gene_cor,show_rownames = F,show_colnames = F)
p值计算
#相关性p值计算,6为样本数量#
library(WGCNA)
geneCor.p <- corPvalueStudent(gene_cor,6)
#将两个基因的相关性与r值和p值制成表格#
r <- as.matrix(gene_cor)
geneCor.p <- as.matrix(geneCor.p)
x <- matrix(NA,ncol = 4,nrow = 27889) #27889为基因的平方#
#填充基因#
for(i in 1:167){
j <- 1
while(j<168)
{ x[(i-1)*167+j,1] <- row.names(r)[i]
x[(i-1)*167+j,2] <- colnames(r)[j]
j <- j+1
}
}
#填充r与p#
for(i in 1:dim(x)[1]){
x[i,3] <- r[x[i,1],x[i,2]]
x[i,4] <- geneCor.p[x[i,1],x[i,2]]
}
colnames(x) <- c("gene","gene","PCC","PCCP")
x <- as.data.frame(x)
#删除相关性值为1的数据#
x <- x[-which(x$PCC==1),]
#按PCC指进行排序#
x <- x[order(x$PCC),]
#去除相关性重复的数据#
y <- matrix(NA,ncol = 4,nrow = 13861)
y <- as.data.frame(y)
j <- 1
for (i in 1:(dim(x)[1]/2)) {
y[j,] <-x[2*i-1,]
j <- j+1
}
colnames(y)<- colnames(x)
write.csv(y,file = "generp_qc.csv")
代码简洁
相关性及p值计算之后的代码简洁版
library(tidyverse)
#相关性表格宽变长
gene_cor <- as.data.frame(gene_cor)
gene_cor %>%mutate(gene1= rownames(gene_cor)) -> gene_cor
rownames(gene_cor)=NULL
gene_cor %>% pivot_longer(-gene1,names_to = "gene2",values_to = "r") -> gene_cor
#p值表格宽变长
geneCor.p <- as.data.frame(geneCor.p)
geneCor.p %>%mutate(gene1= rownames(geneCor.p)) -> geneCor.p
rownames(geneCor.p)=NULL
geneCor.p %>% pivot_longer(-gene1,names_to = "gene2",values_to = "p") ->geneCor.p
#两个表格合并
gene_cor %>% full_join(geneCor.p, by = c("gene1","gene2")) ->x
# 去除相关性为1的行
x %>% filter(gene1!=gene2) -> x
#去除相关性重复的数据
x %>% distinct(r,.keep_all = TRUE) -> x