DSP分析之热图绘制
library(pheatmap)
library(ggplot2)
library(tidyverse)
library(plyr)
library(readxl)
library(reshape2)
library(ggpubr)
getwd()
#数据预处理
```r
raw_data <- as.data.frame(read_excel('E:/page/GeoMx_DSP/MY_DSP/GroupB/DSP_ICB_GROUP.xlsx',sheet = 'raw_data'))
rownames(raw_data) <- raw_data[,1]
raw_data
#将分组信息单独和数据信息分开整理出来
meta_data <- raw_data[1:4,]
meta_data <- meta_data[,-1]
meta_data['patient',] <- colnames(meta_data)
flite_data <- raw_data[-1:-4,]
rownames(flite_data) <- flite_data[,1]
#这是我们用到的数据
flite_data <- flite_data[,-1]
```r
####将数据格式转换为数字型
for (i in 1:length(colnames(flite_data))){
flite_data[,i] <- as.numeric(flite_data[,i])
}
#展示相关性图-查看样本之间相关性如何
#####
flite_cor <- cor(flite_data)
pheatmap(flite_cor,fontsize = 6)
#####使用melt整理数据格式
mydata_flite_cor.m <-melt(flite_cor)
head(mydata_flite_cor.m)
p_cor <-ggplot(mydata_flite_cor.m, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "blue")
p_cor + scale_fill_gradient(name="Value", low = "white",high = "red") +
theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 0))+
coord_fixed(ratio=1)+
theme(axis.text= element_text(size = 8,family="ARL"))+
theme(plot.margin = unit(c(0.1,0,0,0), unit="mm"))+
labs(x = "Var1", y = "Var2", title = "correlation")+
theme(plot.title = element_text(size = 13,hjust = 0.5,family = "ARL" ))+
theme(legend.key.width=unit(3,'mm'),legend.key.height=unit(3,'cm'))+
theme(legend.title = element_text(size = 8))
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
#####获取相关性矩阵的下三角矩阵
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(flite_cor)
upper_tri
upper_tri.m <- melt(upper_tri, na.rm = TRUE)
p_cor.up <-ggplot(upper_tri.m, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white")
p_cor.up + scale_fill_gradient(name="Value", low = "white",high = "red") +
theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 90))+
coord_fixed(ratio=1)+
theme(axis.text= element_text(size = 8,family="ARL"))+
theme(plot.margin = unit(c(0.1,0,0,0), unit="mm"))+
labs(x = "Var1", y = "Var2", title = "correlation")+
theme(plot.title = element_text(size = 13,hjust = 0.5,family = "ARL" ))+
theme(legend.key.width=unit(3,'mm'),legend.key.height=unit(3,'cm'))+
theme(legend.title = element_text(size = 8))
reorder_cormat <- function(cormat){
dd <- as.dist((1-cormat)/2) ##使用相关性系数作为变量间距离
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
mydata_cor.order <- reorder_cormat(flite_cor)
order.upper_tri <- get_upper_tri(mydata_cor.order)
order.upper_tri.m <- melt(order.upper_tri, na.rm = TRUE)
p_cor.order <-ggplot(order.upper_tri.m, aes(Var1, Var2)) +
geom_tile(aes(fill = value),colour = "white")
p_cor.order + scale_fill_gradient(name="Value", low = "blue",high = "red") +
theme(axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 90))+
coord_fixed(ratio=1)+
theme(axis.text= element_text(size = 8,family="ARL"))+
theme(plot.margin = unit(c(0.1,0,0,0), unit="mm"))+
labs(x = "Var1", y = "Var2", title = "correlation")+
theme(plot.title = element_text(size = 13,hjust = 0.5,family = "ARL" ))+
theme(legend.key.width=unit(3,'mm'),legend.key.height=unit(3,'cm'))+
theme(legend.title = element_text(size = 8))
根据样本之间的相关性好坏可以整体上看是否有ROI“不合群”从而进行甄选。
结果如下
#筛选出特征marker比较差异
#Part I nike_gene
#####
marker_nike <-c('CD3D','CD4','CD8','NCAM1','CD68','CD94','CD20','CD206')
marker_nike_data <- flite_data[which(rownames(flite_data)%in%marker_nike),]
#####
#展示热图
#####
annotation_col <- data.frame(t(meta_data))
#rownames(annotation_row) <- rownames(heatmap.marker)
ann_colors <- list(ROI = c('Border CD57' = "#00CCCC",'Tumor CD57' = "#FF9999",
'Border CD57 (CD3)'= "#00CCCC",'Tumor CD57 (CD3)' = "#FF9999"),
Gender = c(man = 'blue'),
age = c('46' = 'orange','48' = 'yellow','52' = 'green','68' = 'black','67' = 'grey'),
Therpy = c(PD = '#FFCC33',PR = '#66CC33'))
###绘图区域
#热图先看一下基因表达情况
#以PD和PR来划分
pdf(file = 'heatmap_nike.pdf',width = 12,height = 6)
pheatmap(marker_nike_data,scale = "row",
color = colorRampPalette(c("#3333FF","#FFFF33"))(1000),
cluster_cols = F,cluster_rows = T,
annotation_col = annotation_col,annotation_colors = ann_colors,
#annotation_row = annotation_row,
cellwidth = 10, cellheight = 10,angle_col = "90",treeheight_row = 13,treeheight_col = 15,
fontsize = 10,fontsize_col = 10,fontsize_row = 10)
GGPLOT绘图主题
##绘图主题
mytheme <- theme(plot.title=element_text(face="plain",
size="13", color="black"),
axis.title=element_text(face="plain",
size=12, color="black"),
axis.text=element_text(face="plain", size=9,
color="black"),
axis.title.x = element_text(size = "15"),
axis.title.y = element_text(size = "15"),
panel.background=element_rect(fill="white",
color="black"),
panel.grid.major.y=element_line(color="white",
linetype=1),
panel.grid.minor.y=element_line(color="white",
linetype=2),
panel.grid.minor.x=element_blank(),
legend.key.size = unit(.3,"inches"),
legend.position="top")+
theme(legend.text=element_text(size=13))+theme(legend.title = element_text(size = 13))
再绘制一个箱图比较两组之间某些基因表达是否具有统计学差异
对数据先整理成ggplot绘制箱图的格式
####统计图处理
nike_data <- melt(marker_nike_data)
nike_data$gene <- rownames(marker_nike_data)
colnames(nike_data) <- c('patient','Value','Gene')
nike_data_plot <- merge(nike_data,t(meta_data),by = 'patient' )
nike_data_plot$ROI[which(nike_data_plot$ROI=='Border CD57 (CD3)')] <- 'Border CD57'
nike_data_plot$ROI[which(nike_data_plot$ROI=='Tumor CD57 (CD3)')] <- 'Tumor CD57'
绘图代码
pdf(file = 'nike_PD_Border_vs_Tumor.pdf',width = 8,height = 6)
ggplot(nike_data_plot[which(nike_data_plot$Therpy =='PD'),],
aes(x = Gene,y = Value,fill = ROI ))+
geom_boxplot()+
geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4)+
labs(title="ICB_PD", x='Nike Markers', y="Normalized Counts") +
stat_compare_means(label = "p.signif",method = "t.test")+
#ylim(c(-3,15))+
guides(fill=guide_legend(title=NULL))+#隐藏图例标题
#scale_fill_discrete(name="Group",breaks=c("Covid19", "Non-Covid19"))
mytheme
dev.off()
结果如下
对于单样本包含多种类型细胞时候需要去卷积来查看不同类型细胞的分布占比。付Cibersort使用的官网链接。
https://cibersort.stanford.edu/