需求描述
用R代码画出paper里的热图heatmap
使用场景
场景一:sample间的相关性,用来评价生物学重复之间的重复性,或者体现不同组的差异
场景二:基因的表达变化聚类、sample聚类
输入数据
df<-read.table("mRNA.txt",row.names = 1,header = T,as.is = T)
head(df)
## DG_01F DG_01M DG_04F DG_04M DG_10F DG_10M DG_20F DG_20M
## ENSMMUG00000000001 1.19 1.02 0.88 1.10 1.20 1.53 1.29 1.44
## ENSMMUG00000000002 0.20 0.21 0.19 0.25 0.49 0.27 0.07 0.16
## ENSMMUG00000000004 0.00 0.01 0.00 0.01 0.01 0.01 0.01 0.00
## ENSMMUG00000000005 0.76 0.62 0.67 0.29 0.43 0.40 0.53 0.43
## ENSMMUG00000000006 0.07 0.00 0.02 0.00 0.01 0.00 0.02 0.01
## ENSMMUG00000000007 2.05 1.50 1.77 1.53 1.31 1.39 1.33 1.22
## PFC_01F PFC_01M PFC_04F PFC_04M PFC_10F PFC_10M PFC_20F
## ENSMMUG00000000001 1.32 1.47 1.29 1.65 1.83 2.15 2.34
## ENSMMUG00000000002 0.58 0.33 0.52 0.54 0.63 0.47 0.31
## ENSMMUG00000000004 0.00 0.03 0.01 0.00 0.01 0.00 0.00
## ENSMMUG00000000005 0.57 0.54 0.89 0.43 0.92 0.43 0.90
## ENSMMUG00000000006 0.03 0.01 0.04 0.00 0.02 0.07 0.03
## ENSMMUG00000000007 0.95 0.91 1.55 1.64 1.15 0.94 1.63
## PFC_20M PCC_01F PCC_01M PCC_04F PCC_04M PCC_10F PCC_10M
## ENSMMUG00000000001 1.78 1.05 0.82 1.03 1.13 1.27 1.49
## ENSMMUG00000000002 0.40 0.42 0.43 0.57 0.24 0.47 0.61
## ENSMMUG00000000004 0.04 0.00 0.00 0.00 0.00 0.00 0.01
## ENSMMUG00000000005 0.62 0.23 0.53 0.64 0.65 0.73 0.46
## ENSMMUG00000000006 0.00 0.11 0.00 0.01 0.02 0.07 0.00
## ENSMMUG00000000007 1.77 1.10 1.11 1.38 1.49 1.25 0.99
## PCC_20F PCC_20M CA1_01F CA1_01M CA1_04F CA1_04M CA1_10F
## ENSMMUG00000000001 2.04 1.36 1.03 0.91 0.63 0.85 0.77
## ENSMMUG00000000002 0.10 0.40 0.46 0.33 0.30 0.07 0.37
## ENSMMUG00000000004 0.01 0.01 0.00 0.01 0.00 0.00 0.02
## ENSMMUG00000000005 0.93 0.52 0.46 0.36 0.84 0.42 0.50
## ENSMMUG00000000006 0.00 0.00 0.04 0.07 0.00 0.00 0.03
## ENSMMUG00000000007 0.69 1.43 1.27 1.09 1.51 0.80 1.56
## CA1_10M CA1_20F CA1_20M CB_01F CB_01M CB_04F CB_04M
## ENSMMUG00000000001 1.06 1.09 0.66 3.10 2.98 2.64 3.28
## ENSMMUG00000000002 0.24 0.22 0.45 0.22 0.67 0.39 0.26
## ENSMMUG00000000004 0.00 0.00 0.00 0.03 0.02 0.01 0.04
## ENSMMUG00000000005 0.67 0.83 0.54 1.39 1.77 0.99 1.31
## ENSMMUG00000000006 0.06 0.00 0.00 0.07 0.02 0.00 0.05
## ENSMMUG00000000007 0.78 1.55 0.80 17.61 18.98 20.29 17.32
## CB_10F CB_10M CB_20F CB_20M OC_01F OC_01M OC_04F OC_04M
## ENSMMUG00000000001 3.38 4.12 4.01 3.44 1.64 1.24 1.72 1.56
## ENSMMUG00000000002 0.26 0.71 0.46 0.50 0.34 0.35 0.36 0.38
## ENSMMUG00000000004 0.00 0.05 0.00 0.00 0.03 0.00 0.00 0.00
## ENSMMUG00000000005 1.40 1.25 1.67 1.38 0.46 0.55 0.70 0.39
## ENSMMUG00000000006 0.04 0.11 0.01 0.05 0.06 0.05 0.01 0.04
## ENSMMUG00000000007 17.43 16.63 17.32 18.86 8.68 11.42 14.48 11.53
## OC_10F OC_10M OC_20F OC_20M TC_01F TC_01M TC_04F TC_04M
## ENSMMUG00000000001 1.56 1.91 1.80 1.61 1.87 1.57 1.69 1.93
## ENSMMUG00000000002 0.53 0.45 0.42 0.40 0.73 0.59 0.49 0.46
## ENSMMUG00000000004 0.00 0.00 0.00 0.01 0.01 0.01 0.01 0.00
## ENSMMUG00000000005 0.88 0.36 1.17 0.82 0.46 0.75 0.75 0.34
## ENSMMUG00000000006 0.03 0.03 0.06 0.16 0.01 0.01 0.00 0.01
## ENSMMUG00000000007 9.92 8.48 9.87 10.72 9.37 12.27 14.18 13.42
## TC_10F TC_10M TC_20F TC_20M PC_01F PC_01M PC_04F PC_04M
## ENSMMUG00000000001 1.96 2.33 2.24 1.85 1.21 0.88 0.84 1.23
## ENSMMUG00000000002 0.71 0.72 0.56 0.34 0.55 0.14 0.24 0.20
## ENSMMUG00000000004 0.00 0.01 0.01 0.00 0.02 0.00 0.00 0.03
## ENSMMUG00000000005 0.88 0.58 0.99 0.53 0.38 0.40 0.41 0.24
## ENSMMUG00000000006 0.00 0.03 0.10 0.06 0.00 0.00 0.02 0.02
## ENSMMUG00000000007 10.88 11.44 13.19 13.37 1.29 1.39 1.05 1.28
## PC_10F PC_10M PC_20F PC_20M
## ENSMMUG00000000001 1.25 1.27 1.59 1.03
## ENSMMUG00000000002 0.35 0.57 0.34 0.21
## ENSMMUG00000000004 0.00 0.00 0.02 0.00
## ENSMMUG00000000005 0.63 0.30 0.81 0.43
## ENSMMUG00000000006 0.00 0.07 0.00 0.18
## ENSMMUG00000000007 1.62 1.68 1.67 1.47
#查看一共有多少个基因,多少个sample
dim(df)
## [1] 26654 64
开始画图
场景一:sample间的相关性,用来评价生物学重复之间的重复性,或者不同组的差异
#计算每两个sample之间的相关系数,method可选"pearson" (default), "kendall", or "spearman"
cormat<-round(cor(df,method = "spearman"),2)
#install.packages("pheatmap")
library(pheatmap)
pheatmap(cormat,cellwidth = 8, cellheight = 8,fontsize = 8,
color = colorRampPalette(c("#3C7DAF", "#EAF4F1","#FFFCBA", "#E83140"))(20),
show_colnames=T,show_rownames =T,#显示sample的名字
#border_color = "NA",#默认有边框,不要边框的话就加这行
treeheight_row = "0",treeheight_col = "0")#不画树
输出pdf文件,只需加一行:filename=“文件名”
pheatmap(cormat,cellwidth = 8, cellheight = 8,fontsize = 8,
color = colorRampPalette(c("#3C7DAF", "#EAF4F1","#FFFCBA", "#E83140"))(20),
show_colnames=T,show_rownames =T,
#border_color = "NA",#默认有边框,不要边框的话就加这行
treeheight_row = "0",treeheight_col = "0",
filename="Correlation.pdf")
如果sample数量少,还可以在热图里显示数字
只需加一行:display_numbers = TRUE
此处用20个sample展示效果
pheatmap(cormat[4:24,4:24],cellwidth = 15, cellheight = 15,fontsize = 8,
color = colorRampPalette(c("#3C7DAF", "#EAF4F1","#FFFCBA", "#E83140"))(20),
show_colnames=T,show_rownames =T,
display_numbers = TRUE,#显示数字
treeheight_row = "0",treeheight_col = "0")#不画树
有些热图,同组sample用同一个颜色表示,那些色块也是同时画出来的
要为每组设置颜色
#先查看有哪些sample,顺序是怎样的
colnames(df)
## [1] "DG_01F" "DG_01M" "DG_04F" "DG_04M" "DG_10F" "DG_10M" "DG_20F"
## [8] "DG_20M" "PFC_01F" "PFC_01M" "PFC_04F" "PFC_04M" "PFC_10F" "PFC_10M"
## [15] "PFC_20F" "PFC_20M" "PCC_01F" "PCC_01M" "PCC_04F" "PCC_04M" "PCC_10F"
## [22] "PCC_10M" "PCC_20F" "PCC_20M" "CA1_01F" "CA1_01M" "CA1_04F" "CA1_04M"
## [29] "CA1_10F" "CA1_10M" "CA1_20F" "CA1_20M" "CB_01F" "CB_01M" "CB_04F"
## [36] "CB_04M" "CB_10F" "CB_10M" "CB_20F" "CB_20M" "OC_01F" "OC_01M"
## [43] "OC_04F" "OC_04M" "OC_10F" "OC_10M" "OC_20F" "OC_20M" "TC_01F"
## [50] "TC_01M" "TC_04F" "TC_04M" "TC_10F" "TC_10M" "TC_20F" "TC_20M"
## [57] "PC_01F" "PC_01M" "PC_04F" "PC_04M" "PC_10F" "PC_10M" "PC_20F"
## [64] "PC_20M"
#按照sample的顺序,告诉R,它是属于哪个组的
annotation_col = data.frame(
type = factor(rep(c("DG","PFC","PCC","CA1","CB","OC","TC","PC"),c(8,8,8,8,8,8,8,8))))
rownames(annotation_col) = colnames(df)
annotation_row = data.frame(
type = factor(rep(c("DG","PFC","PCC","CA1","CB","OC","TC","PC"),c(8,8,8,8,8,8,8,8))))
rownames(annotation_row) = colnames(df)
#然后给每个组设置颜色
ann_colors = list(
type = c(DG = "blue", PFC = "green", PCC = "red", CA1 = "black",
CB = "pink", OC = "grey", TC = "yellow", PC = "purple")
)
pheatmap(cormat,cellwidth = 8, cellheight = 8,
fontsize = 8,
#display_numbers = TRUE,
color = colorRampPalette(c("navy", "white", "firebrick3"))(20),
show_colnames=F,show_rownames =F,#不显示sample的名字
annotation_col = annotation_col, annotation_row = annotation_row,
annotation_colors = ann_colors,
treeheight_row = "0",treeheight_col = "0",#不画树
border_color = "NA")#不显示边框
场景二:基因的表达变化聚类、sample聚类
library(pheatmap)
#有时会有多种分组方式,那就分别告诉R,此处增加一个性别组
annotation_col = data.frame(
Gender = factor(rep(c("F","M"),32)),#按性别分组
type = factor(rep(c("DG","PFC","PCC","CA1","CB","OC","TC","PC"),c(8,8,8,8,8,8,8,8))))
rownames(annotation_col) = colnames(df)
ann_colors = list(
Gender = c(F = "#FFA42D", M = "#A9D9DF"),#给性别分组设置颜色
type = c(DG = "blue", PFC = "green", PCC = "red", CA1 = "black",
CB = "pink", OC = "grey", TC = "yellow", PC = "purple")
)
#此处用前1000行基因画图
#实际作图时,先筛差异基因,再用差异基因画图;或者用变化大的Top几千个基因画图
pheatmap(df[1:1000,],cellwidth = 8, cellheight = 1, fontsize = 8,
method="spearman", #计算gene或sample之间的相关性的方法,可选"pearson" (default), "kendall", or "spearman"
scale="row", #为基因做scale
cluster_rows=T,#为基因做聚类
cluster_cols=T,#为sample做聚类
color = colorRampPalette(c("navy", "white", "firebrick3"))(20),
show_colnames=F,show_rownames =F,
annotation_col = annotation_col,
annotation_colors = ann_colors,
#treeheight_row = "0",treeheight_col = "0",#不画树
border_color = "NA")
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pheatmap_1.0.10
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.17 digest_0.6.15 rprojroot_1.3-2
## [4] plyr_1.8.4 grid_3.5.0 gtable_0.2.0
## [7] backports_1.1.2 magrittr_1.5 scales_0.5.0
## [10] evaluate_0.10.1 stringi_1.2.2 rmarkdown_1.9
## [13] RColorBrewer_1.1-2 tools_3.5.0 stringr_1.3.1
## [16] munsell_0.4.3 yaml_2.1.19 compiler_3.5.0
## [19] colorspace_1.3-2 htmltools_0.3.6 knitr_1.20