rm(list = ls())
####step1:聚类样本####
#BiocManager::install("WGCNA",force = TRUE)
library("WGCNA")
load("WGCNA.Rdata")
WGCNA_matrix <- fpkm
WGCNA_matrix = t(fpkm[order(apply(fpkm,1,mad), decreasing = T)[1:10000],])
datExpr <- WGCNA_matrix
sampleNames = rownames(datExpr)
sampleTree = hclust(dist(datExpr), method = "average")
sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="",
xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
abline(h = 80, col = "red")#改,得到合适的h值
clust = cutreeStatic(sampleTree, cutHeight = 80, minSize = 10)#cutHeight和前一行h值相同
table(clust)
keepSamples = (clust==1)
datExpr = datExpr[keepSamples, ]
sampleTree = hclust(dist(datExpr), method = "average")
sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="",
xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
###如果觉得聚类效果不好可以重来一遍###
####建立加权基因共表达网络,得到关联性较强的基因模块####
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
par(mfrow = c(1,2));
cex1 = 0.9#一般设置为0.85或者0.9,目的是调节网络的关联性,越大关联性越好
png(file = "SoftThreshold.png",width = 800, height = 600)#可调形参如res(分辨率),width(宽度),height(高度)等
par(mfrow = c(1,2))
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.9,col="red")#如果调0.9的话这里也要改成h=0.9
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
power = sft$powerEstimate
power#这一步会得到系统推荐的power值,得到后后面的power值改动即可
net = blockwiseModules(
datExpr,
power = 7,#power值改
maxBlockSize = 6000,
TOMType = "unsigned", minModuleSize = 100,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "AS-green-FPKM-TOM",
verbose = 3
)
table(net$colors)
png(file = "moduleCluster.png", width = 1200, height = 800)
mergedColors = labels2colors(net$colors)
plotDendroAndColors(dendro = net$dendrograms[[1]],
colors = mergedColors[net$blockGenes[[1]]],
groupLabels = "Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05,
main = "Gene dendrogram and module colors")
dev.off()
####筛选和表型相关的模块####
MEs = moduleEigengenes(datExpr, mergedColors)$eigengenes
MEs = orderMEs(MEs)
datTraits <- clinical#注意clinical和MEs的样本名称完全一致
moduleTraitCor=cor(MEs, datTraits, use="p")
write.table(file="Step04-modPhysiological.cor.xls",moduleTraitCor,sep="\t",quote=F)
moduleTraitPvalue=corPvalueStudent(moduleTraitCor, nSamples)
write.table(file="Step04-modPhysiological.p.xls",moduleTraitPvalue,sep="\t",quote=F)
png(file="Module_trait_relationships.png",width=800,height=900)
textMatrix=paste(signif(moduleTraitCor,2),"\n(",signif(moduleTraitPvalue,1),")",sep="")
dim(textMatrix)=dim(moduleTraitCor)
labeledHeatmap(Matrix=moduleTraitCor,
xLabels=colnames(datTraits),
yLabels=names(MEs),
ySymbols=names(MEs),
colorLabels=FALSE,
colors=blueWhiteRed(50),
textMatrix=textMatrix,
setStdMargins=TRUE,
cex.text=1.2,
cex.lab=0.9,
zlim=c(-1,1),
main=paste("Module-trait relationships"))
dev.off()
####得到和表型相关的模块####
HF_stage = as.data.frame(datTraits$HF)#改
names(HF_stage) = "heart_failure"
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("HF", modNames, sep="");
names(MMPvalue) = paste("p.HF", modNames, sep="");
geneTraitSignificance = as.data.frame(cor(datExpr, HF_stage, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(HF_stage), sep="")
names(GSPvalue) = paste("p.GS.", names(HF_stage), sep="")
module = "brown"#改成你想要的模块
column = match(module, modNames);
moduleColors=mergedColors
moduleGenes = moduleColors==module;
png(file="Module_membership_vs_gene_significance.png")
par(mfrow = c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for HF",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()
brown_gene <- colnames(datExpr)[moduleGenes]
save(brown_gene,file = "brown_gene.Rdata")
dev.off()
输入文件有两个:
一:
二:
常见的问题:
1:输入文件中为什么基因表达文件的样本和表型文件的样本不一致?
答:因为有些样本聚类效果较差需要剔除,这也就是我们第一步所需要做的事情,提出之后的样本就和表型文件的样本一致了。(顺序需要自己排成一致,intersect函数即可)
2:为什么有时候做出来的瀑布图不好看?
答:两方面原因,一方面输入文件之中的基因表达文件包含信息过少,只有几百个基因,另一方面cex值可以调小些也可以解决。
3:输出图形时为什么感觉画面很模糊,想要调清晰怎样办?
答:可以调形参,如 png(file = "moduleCluster.png", width = 1200, height = 800) ,可以调节为png(file = "moduleCluster.png",width=8000,height=6000,res=300)或者其他。
4:最后我要得到和表型相关的模块时候,只需要改HF_stage = as.data.frame(datTraits$HF)就可以了吗?
答:是的,不过在调节形参时候你还需要改相应的x轴标题和y轴标题
5:在得到和表型相关的模块之后(即最后一幅图),我想要在图中加两条线代表筛选相交基因怎么解决?
答:verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for HF",#改成你想要的x轴标题和y轴标题
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
在这个后面可以加上一个:abline(h=?,v=?,)即可。
6:本文代码数据均来自于互联网,本人仅用于分享和补充,无盈利行为!