R语言画热图的方法很多,要注意聚类的方法 (距离函数),数据是否归一化,是否含有异常值等。
1. 生成演示数据
exprs_matrix = matrix(rnorm(200), 20)
colnames(exprs_matrix) <- paste0("sample",1:10)
rownames(exprs_matrix) <- paste0("gene",1:20)
head(exprs_matrix)
# 加上样品信息
sample_anno <- data.frame('class'= c(rep("normal",4),
rep("control",6)),
'group'= c(rep("group1",8),
rep("group2",2)))
rownames(sample_anno)=colnames(exprs_matrix)
# sample_anno: 行名为样品名,列为分组信息
2. 内置heatmap函数绘制热图
n_col <- ncol(exprs_matrix)
heatmap(exprs_matrix,col=colorRampPalette(c("red","white","blue"))(64),
ColSideColors=colorRampPalette(c("green","black","red"))(n_col))
# 主要参数:
# col:指定热图所用颜色:
#
# ColSideColors/RowSideColors代表列、行边是否显示颜色bar
#
# ColV/RowV表示是否按照列/行聚类,默认均为真值
#
# cexCol/cexRow分别表示列/行标签字体大小
#
# scale=c("row","column","none") 设置是否归一化
#
# margins:设置热图下方及右方宽度
3. pheatmap包绘制热图
library(pheatmap)
pheatmap(exprs_matrix)
pheatmap(exprs_matrix,treeheight_row=120,
treeheight_col=20)
# 不显示列(样品)名称
pheatmap(exprs_matrix,treeheight_row=120,
treeheight_col=20,show_colnames = F)
# 注释样品分类信息
pheatmap(exprs_matrix,annotation=sample_anno)
# 主要参数
# cellwidth、cellheight 小方格宽度、高度
# scale: (row,column,none) 行列是否归一化
# cluster_rows、cluster_cols #是否按行、列聚类
# treeheight_row、treeheight_col #横向、纵向树高度
# annotation:加上分组信息
# display_numbers=TRUE #在小方格中显示数字
4. gplots包绘制热图
library(gplots)
rc <- rainbow(nrow(exprs_matrix), start=0, end=.3) # mock
#映射颜色到样品分类信息
ss <- factor(c(rep("normal",4),
rep("control",6)))
levels(ss) <- c('red','blue')
heatmap.2(exprs_matrix,col = colorRampPalette(c("red","white","blue"))(64),
hclust=function(x) hclust(x,method = 'ward.D2'),
distfun=function(x) dist(x,method='euclidean'),
scale = "row",
dendrogram = 'column',
key = TRUE, keysize = 1.5,symkey = FALSE,
density.info = "none", # histogram,density,none,histogram
#add.expr
RowSideColors=rc,
ColSideColors=as.vector(ss), margin=c(8, 5),
xlab="samples", ylab= "genes",
main="heatmap.2 plot",
adjCol=c(0.5,1),
offsetRow = 0.5,
offsetCol = 2.5,
trace = "none",
cexRow = 0.5)
# heatmap.2 加样品注释信息不是很方便。
5. ggplot2包绘制热图
library(ggplot2)
library(reshape2)
hc<-hclust(dist(exprs_matrix),method = "complete") #对行进行聚类
rowInd<-hc$order #将聚类后行的顺序存为rowInd
hc<-hclust(dist(t(exprs_matrix)),method = "complete") #对矩阵进行转置,对原本的列进行聚类
colInd<-hc$order #将聚类后列的顺序存为colInd
exprs_matrix<-exprs_matrix[rowInd,colInd] #将数据按照聚类结果重排行和列
dp=reshape2::melt(exprs_matrix) #对数据进行融合,适应ggplot的数据结构,以进行热图的绘制
colnames(dp) <- c("Gene","Sample","Value")
p<-ggplot(dp,aes(x=Sample,y=Gene,fill=Value)) + #指定横纵坐标、填充值
xlab("Genes")+ylab("Samples")+labs(title="ggplot2 heatmap") +
geom_tile(colour="white",size=0) +
scale_fill_gradient(low = "green",high = "red") + #定义填充颜色
geom_text(aes(label=round(Value,2)),angle=45,size=3) #加数字
print(p)
6 .ComplexHeatmap包绘制热图
library(ComplexHeatmap)
mat <- exprs_matrix
Heatmap(mat)
# 聚类会受到异常值的影响
mat2 = mat
mat2[1, 1] = 100000
Heatmap(mat2, name = "mat", col = col_fun,
column_title = "a matrix with outliers")
library(circlize)
# 定义热图颜色(值和颜色的对应)
# 设置同样的值-颜色映射,方便多个热图相互比较
#summary(mat)
#min <- min(mat)
#max <- max(mat)
col_fun = colorRamp2(c(-4, 0, 4), c("green", "white", "red"))
# col_fun(seq(-3, 3))
Heatmap(mat, name = "mat", col = col_fun)
Heatmap(mat, name = "mat", col = rev(rainbow(10)),
column_title = "set a color vector for a continuous matrix")
discrete_mat = matrix(sample(1:4, 100, replace = TRUE), 10, 10)
colors = base::structure(1:4, names = c("1", "2", "3", "4")) # black, red, green, blue
Heatmap(discrete_mat, name = "mat", col = colors,
column_title = "a discrete numeric matrix")
# name 热图名字
# col 设置方格颜色
# na_col;设置NA 颜色
# column_title/row_title 列行名
# column_title_side/row_title_side 列行名位置
# show_column_dend/show_row_dend 是否关闭列/行柱状树状图
# column_dend_side/row_dend_side 列/行柱状树状图位置
# clustering_distance_rows/clustering_distance_columns 距离量度
# clustering_method_rows/clustering_method_columns 聚类方法
ha = HeatmapAnnotation(bar = sample(letters[1:3], 10, replace = TRUE))
ha = HeatmapAnnotation(
foo = c(1:4, NA, 6:10),
bar = c(NA, sample(letters[1:3], 9, replace = TRUE)),
col = list(foo = col_fun,
bar = c("a" = "red", "b" = "green", "c" = "blue")),
na_col = "black"
)
# col 参数:Colors corresponding to value.
# If the mapping is discrete, the value of col should be a named vector;
# If the mapping is continuous, the value of col should be a color mapping function.
Heatmap(mat, name = "mat", top_annotation = ha)
# 分块画热图
hc = hclust(dist(exprs_matrix)) # 行数据(gene)之间的距离
group = cutree(hc, k = 4)
Heatmap(exprs_matrix, cluster_rows = cluster_within_group(t(exprs_matrix), group),
row_split = 4, border = TRUE) # it would be better if also set row_split
参考:
https://jokergoo.github.io/ComplexHeatmap-reference/book/introduction.html#general-design