经过一段时间的沉淀,文章内容看了一两遍,才稍微懂得一些里面的含义,至于算法结构之类的还是有些不懂,数学不够好。
library(WGCNA)
options(stringsAsFactors = FALSE)
myData = read.table("new.txt", sep="\t", header=TRUE)
dim(myData)
names(myData)
datExpr = as.data.frame(t(myData[, -c(1)]))
names(datExpr) = myData$inputID
rownames(datExpr) = names(myData)[-c(1)]
gsg = goodSamplesGenes(datExpr, verbose = 3)
if (!gsg$allOK)
{
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", ")))
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", ")))
datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]
}
write.table(names(datExpr)[!gsg$goodGenes], file="Out/removeGene.xls", row.names=FALSE, col.names=FALSE, quote=FALSE)
write.table(names(datExpr)[!gsg$goodSamples], file="Out/removeSample.xls", row.names=FALSE, col.names=FALSE, quote=FALSE)
sampleTree = flashClust(dist(datExpr), method = "average") #根据样本表达量使用平均距离法建树
pdf(file = "Out/sampleClustering.pdf", width = 12, height = 9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
dev.off()
save(datExpr, file = "dataInput.RData")
library(WGCNA)
options(stringsAsFactors = FALSE)
ena