问题:如果对样本之间的差异感兴趣,则需要对表达矩阵(行为基因,列为样本)按行进行标准化。
解答:对的,按行标准化,尝试3种方法,第1和3结果一致。scale函数是按列归一化。
mat <- read.table("expression_data.txt", header = TRUE, row.names = 1)
#行基因,列为样本
## 用标准化的自定义函数
scale_rows<- function(x){
m = apply(x, 1, mean, na.rm = T)
s = apply(x, 1, sd, na.rm = T)
return((x - m)/s)
}
## 测试1:用标准化的自定义函数
a <- scale_rows(mat)
## 测试2:用scale函数
b <- scale(mat,scale = TRUE,center = TRUE)
bk <- c(seq(-4,-0.1,by=0.01),seq(0,4,by=0.01))
p1 <- pheatmap(a,scale = 'none',
color = c(colorRampPalette(colors = c("blue","white"))(length(bk)/2),colorRampPalette(colors = c("white","red"))(length(bk)/2)),
legend_breaks=seq(-4,4,2),
breaks=bk)
p2 <- pheatmap(b,scale = 'none',
color = c(colorRampPalette(colors = c("blue","white"))(length(bk)/2),colorRampPalette(colors = c("white","red"))(length(bk)/2)),
legend_breaks=seq(-4,4,2),
breaks=bk)
## 测试3:直接在pheatmap中设置scale=‘row'
p3 <- pheatmap(mat,scale = 'row',
color = c(colorRampPalette(colors = c("blue","white"))(length(bk)/2),colorRampPalette(colors = c("white","red"))(length(bk)/2)),
legend_breaks=seq(-4,4,2),
breaks=bk)